Abstract
This is the code to for MINT sPLS-DA (and all the supporting analysis) to make Fig. 2 (and the relevant supplementary figures/tables).# First checking if I have the packages I need, and installing if not
list.of.packages <- c("tidyverse", "gplots", "RColorBrewer", "networkD3", "vegan", "rstatix",
"ggalluvial", "gridExtra", "propr", "patchwork", "phyloseq", "dplyr",
"VennDiagram", "plyr", "data.table", "vsn", "pheatmap", "ggpubr",
"microbiome", "rmarkdown", "mixOmics", "NetCoMi", "igraph", "randomForest",
"caret", "pdp", "rstatix")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
# Installing some separately:
# phyloseq
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install("phyloseq")
# DESeq2
# BiocManager::install("DESeq2")
# vsn
# BiocManager::install("vsn")
# Had issues installing tidyverse, so I followed this first (insttalled some dependencies from the terminal) and then it worked: https://community.rstudio.com/t/tidyverse-package-does-not-install/162507/6
library(tidyverse)
library(gplots)
library(RColorBrewer)
library(ggrepel)
library(units)
library(future) # Needed to use multiple processors
library(furrr) # Needed to use multiple processors
library(progressr) # to track the progress
library(networkD3) # for Sankey diagrams
library(vegan)
library(rstatix)
library(parallel) # for setting multiple CPUs
library(ggalluvial) # to create alluvial diagrams
library(gridExtra)
# library(propr) # to compute the proportionality metric!
library(patchwork)
library(phyloseq)
library(ggnewscale)
library(dplyr)
library(VennDiagram)
# if (!require(devtools)) install.packages("devtools")
# devtools::install_github("gaospecial/ggVennDiagram")
library(ggVennDiagram)
library(ggvenn)
library(plyr) # Needed for the ordination plots I did on my own
library(data.table) # For data wrangling
library(vsn) # For normalization
# library(psych) # Used to investigate correlations among predictors in RDA analysis, ran in vegan
library(RColorBrewer)
library(pheatmap)
library(ggpubr) # has ggarrange
# EcolUtils by Guillem Salazar
library(devtools)
# devtools::install_github("GuillemSalazar/EcolUtils")
library(EcolUtils)
# Install pairwiseAdonis
# devtools::install_github("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis")
library(pairwiseAdonis)
# install.packages("remotes")
# remotes::install_github("microbiome/microbiome")
library(microbiome)
# install.packages("rmarkdown")
library(rmarkdown)
library(mixOmics)
# Installing the most important package - mixOmics!
# Install mixOmics
# BiocManager::install('mixOmics')
# library(pairwiseAdonis)
# Installing the NetCoMi package I will need for microbial networks: https://github.com/stefpeschel/NetCoMi
# install.packages("devtools")
# install.packages("BiocManager")
library(doParallel)
library(foreach)
# Install NetCoMi
# devtools::install_github("stefpeschel/NetCoMi",
# dependencies = c("Depends", "Imports", "LinkingTo"),
# repos = c("https://cloud.r-project.org/",
# BiocManager::repositories()))
library(NetCoMi)
library(igraph) # needed for the function below: read_graph
# Now installing propr - to compute proportionality metric (microbial networks)
# devtools::install_github("tpq/propr")
# library(propr)
# For random forest models:
library(randomForest)
library(caret) # For data splitting and model evaluation
library(pdp)
# Packages I need for the map:
library(reshape2) # This is only needed for the melt funtion
library(reshape)
library(dplyr)
library(plyr)
library(data.table)
library(ggplot2)
library(rworldmap)
library(rworldxtra)
# library(ggsn)
library(sf)
library(raster)
library(terra)
# library(rgeos)
# library(maps)
# library(maptools)
library(grid)
library(miscTools)
library(stringr)
library(ggpubr)
library(plyr)
library(gridExtra) # for arranging plots
# Loading all the libraries
library(raster)
library(phyloseq)
library(tidyverse)
library(ggspatial)
library(ggrepel) # The package ggrepel offers a very flexible approach to deal with label placement (with geom_text_repel and
# geom_label_repel), including automated movement of labels in case of overlap
# Installing AIMS packages individually
library(remotes) # Needed to install the AIMS R packages below
# remotes::install_github("https://github.com/open-AIMS/dataaimsr")
library(dataaimsr)
# remotes::install_github("https://github.com/open-AIMS/gisaimsr")
library(gisaimsr)
library(sp)
# Importing the R object
load("/home/marko-terzin/Documents/PhD/Thesis/Chapter_3_IMOS-MGD_MAGs/cleaned_code/Figure_1/Figure_1.RData")
# Setting the work directory
setwd("/home/marko-terzin/Documents/PhD/Thesis/Chapter_3_IMOS-MGD_MAGs")
Importing the metagenomics data
# Absolute counts
pMAGs_95ANI <- read.delim("/home/marko-terzin/Documents/PhD/Thesis/Chapter_3_IMOS-MGD_MAGs/from_Katherine_final_abundances/only_pMAGs/pMAGs.tsv") %>%
column_to_rownames("genome")
# This is after I partitioned the abundances from Katherine and removed viral contigs (IMOS_V) and mobile
# elements (IMOS_M) and euk hits (IMOS_E). This should give me 876 pMAGs95ANI, and 190 samples (46 x 4 + 3 reps (Hedley) and 3reps (Davie)). Let's double check
dim(pMAGs_95ANI)
# Cleaning column names: Remove "counts." from column names
colnames(pMAGs_95ANI) <- gsub("count\\.", "", colnames(pMAGs_95ANI))
# Importing the taxa names and MAGs stats:
pMAGs_95ANI_stats <- read.csv("~/Documents/PhD/Thesis/Chapter_3_IMOS-MGD_MAGs/from_Katherine_final_abundances/only_pMAGs/Matching_names_876_MAGs_and_5000_dRep.csv")
# This object will be needed later on when I start plotting genome size, GC and etc. But for the phyloseq object I only need the taxonomy info, so let's select the columns of interest now:
pMAGs_95ANI_stats_TAX <- pMAGs_95ANI_stats %>%
dplyr::select("Katherine_names_876_bins", "GTDBtk.classification..R214.") %>%
column_to_rownames("Katherine_names_876_bins") %>%
separate(col = "GTDBtk.classification..R214.",
into = c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"),
sep = ";")
### Data input ###
# Adding the metadata file!
# Metadata - where I have average values of WQ and LTMP data
# I already have it from the read-based analysis, I just made sure (manually!) it corresponds to Yun Kit's naming
# Importing the file I used for random forest models because it doesn't have naS
metadata_IMOS_MGD_MAGs <- read.csv("~/Documents/PhD/Thesis/Chapter_3_IMOS-MGD_MAGs/metadata_for_random_forest_models.csv")
# metadata_IMOS_MGD_MAGs <- read.csv("~/Documents/PhD/Thesis/Chapter_3_IMOS-MGD_MAGs/input_data_from_Yun_Kit/metadata_IMOS_MGD_MAGs_Katherine.csv")
### Important update on 2nd June 2025: modified 21-580 manually to say it's open to fishing. Same modification in MINT_sector_manually_edited.csv
# Now adding the sector information as well
MINT_sector_manually_edited <- read.csv("~/Documents/PhD/Thesis/Chapter_3_IMOS-MGD_MAGs/MINT_sector_manually_edited.csv")
# Merging:
metadata_IMOS_MGD_MAGs <- left_join(metadata_IMOS_MGD_MAGs,
MINT_sector_manually_edited %>%
dplyr::select("Sample_ID", "SECTOR")
)
# Adding sector value for two sites where it's missing for some reason...
# Dor 21-580:
metadata_IMOS_MGD_MAGs$SECTOR <- ifelse(grepl("21580_", metadata_IMOS_MGD_MAGs$Sample_ID),
"SW", metadata_IMOS_MGD_MAGs$SECTOR)
# For St Crispin:
metadata_IMOS_MGD_MAGs$SECTOR <- ifelse(grepl("stcrispin_", metadata_IMOS_MGD_MAGs$Sample_ID),
"CA", metadata_IMOS_MGD_MAGs$SECTOR)
# Now adding the WQ medians!
# WQ_medians_per_reef <- read.csv("~/Documents/PhD/Thesis/Chapter_3_IMOS-MGD_MAGs/input_data_from_Yun_Kit/metadata_WQ_medians_per_reef.csv")
# Merging
# metadata_IMOS_MGD_MAGs <- left_join(metadata_IMOS_MGD_MAGs, WQ_medians_per_reef)
# Merging into a phyloseq object
OTU = otu_table(pMAGs_95ANI, taxa_are_rows = TRUE)
TAX = tax_table(as.matrix(pMAGs_95ANI_stats_TAX))
pMAGs_95ANI_phyloseq <- phyloseq(OTU, TAX)
# Before merging into the same phyloseq object, make sure the Sample IDs in metadata and OTU objects are in the same order
OTU_Sample_IDs <- as.data.frame(colnames(OTU))
colnames(OTU_Sample_IDs) <- "Sample_ID"
# Making sure samples are ordered in the same way
metadata_IMOS_MGD_MAGs <- left_join(OTU_Sample_IDs, metadata_IMOS_MGD_MAGs)
# Just changing the sectors so that they are ordered from North to South
# Cape Grenville sector
# Princess Charlotte Bay sector
# Cairns sector
# Innisfail sector
# Townsville sector
# Swains sector
# Capricorn Bunker sector
metadata_IMOS_MGD_MAGs <- metadata_IMOS_MGD_MAGs %>%
mutate(SECTOR_N_S = recode(SECTOR,
CG = "01_Cape_Grenville",
PC = "02_Princess_Charlotte_bay",
CA = "03_Cairns",
IN = "04_Innisfail",
TO = "05_Townsville",
SW = "06_Swains",
CB = "07_Capricorn_Bunker"))
# Preparing the metadata to merge with the phyloseq object
sampledata <- metadata_IMOS_MGD_MAGs
# 1. Ensure we have the exact same samples in both datasets
otu_samples <- colnames(otu_table(pMAGs_95ANI_phyloseq))
sampledata <- sampledata %>%
filter(Sample_ID %in% otu_samples) %>%
arrange(match(Sample_ID, otu_samples))
# 2. Verify dimensions match exactly
if(nrow(sampledata) != length(otu_samples)) {
stop(paste("Sample count mismatch: Metadata has", nrow(sampledata),
"samples, OTU table has", length(otu_samples)))
}
# 3. Create sample_data object with proper row names
sampledata <- sample_data(
data.frame(
sampledata,
row.names = sampledata$Sample_ID, # Explicit row names
stringsAsFactors = FALSE
)
)
# 4. Final validation checks
print("First 5 sample names in metadata:")
## [1] "First 5 sample names in metadata:"
print(head(rownames(sampledata)))
## [1] "11049_1__BBG5542_21S000057" "11049_2__BBG5543_21S000444"
## [3] "11049_3__BBG5544_21S000445" "11049_4__BBG5545_21S000446"
## [5] "11162_1__BBG5534" "11162_2__BBG5535"
print("First 5 sample names in OTU table:")
## [1] "First 5 sample names in OTU table:"
print(head(colnames(otu_table(pMAGs_95ANI_phyloseq))))
## [1] "11049_1__BBG5542_21S000057" "11049_2__BBG5543_21S000444"
## [3] "11049_3__BBG5544_21S000445" "11049_4__BBG5545_21S000446"
## [5] "11162_1__BBG5534" "11162_2__BBG5535"
print("Any NA values in Sample_ID?")
## [1] "Any NA values in Sample_ID?"
print(any(is.na(sampledata$Sample_ID)))
## [1] FALSE
if(!validObject(sampledata)) {
print("Sample data validation failed. Structure:")
str(sampledata)
stop("Invalid sample_data object")
}
# 5. Merge with phyloseq
pMAGs_95ANI_phyloseq <- merge_phyloseq(pMAGs_95ANI_phyloseq, sampledata)
### Data Normalisation - Center Log Ratio (CLR) transformation ###
# CLR is the normalisation method suggested by the mixOmics R package for microbial data - a way to address missing values that are characteristic of microbial datasets. I need to remove missing values before doing the CLR normalisation - The geometric mean cannot be determined for sparse data without deleting, replacing or estimating the 0 count values. So I am introducing pseudo counts
### Tutorial used: http://mixomics.org/mixmc/mixmc-preprocessing/
# Checking if there are any zeros - BEFORE adding pseudocounts
sum(which(pMAGs_95ANI_phyloseq@otu_table == 0))
# Pseudocounts - replacing all zero vals with 1;
pMAGs_95ANI_phyloseq@otu_table <- pMAGs_95ANI_phyloseq@otu_table + 1
# Checking if there are any zeros - AFTER adding pseudocounts
sum(which(pMAGs_95ANI_phyloseq@otu_table == 0))
# All good! No NAs after introducing pseudocounts
### Now I can CLR transform:
# All data, but the correct thing should be each sector separately
pMAGs_95ANI_phyloseq_clr <- microbiome::transform(pMAGs_95ANI_phyloseq, transform = "clr")
# CLR within each sector - use this object for MINT:
# 1. Extract sample data and sectors
sector_info <- sample_data(pMAGs_95ANI_phyloseq)$SECTOR_N_S
sectors <- unique(sector_info)
# 2. Initialize an empty list to store CLR-transformed subsets
phy_clr_list <- list()
# 3. Loop through each sector, apply CLR, and store results
for (sector in sectors) {
# Subset phyloseq by sector
phy_subset <- subset_samples(pMAGs_95ANI_phyloseq, SECTOR_N_S == sector)
# Apply CLR to the subset (with pseudo-count to avoid zeros)
phy_subset_clr <- microbiome::transform(phy_subset, transform = "clr")
# Store in list
phy_clr_list[[sector]] <- phy_subset_clr
}
# 4. Merge all sectors back into one phyloseq object
pMAGs_95ANI_phyloseq_CLR_per_sector <- do.call(merge_phyloseq, phy_clr_list)
# 5. Verify the output
head(otu_table(pMAGs_95ANI_phyloseq_CLR_per_sector)) # Should show CLR values
### IMPORTANT: use pMAGs_95ANI_phyloseq_CLR_per_sector only for MINT!!
Principal Components Analysis (PCA) was applied in an R package mixOmics (Rohart et al. 2017b) as an unsupervised approach to visualise the main clustering patterns between reef sites based on microbial community profiles The number of optimal PCA components was determined using the tune.pca() function in mixOmics (Rohart et al. 2017b).
# This is a matrix with OTU counts - raw, with pseudocounts introduced!
MAGs <- as.data.frame(pMAGs_95ANI_phyloseq_clr@otu_table)
MAGs <- t(MAGs)
# Check dimensions of data
dim(MAGs)
class(MAGs)
# Minimal code - mixOmics, PCA
result.pca <- pca(MAGs)
pca_MAGs_95 <- plotIndiv(result.pca,
group = sample_data(pMAGs_95ANI_phyloseq_clr)$Sampling_trip,
title = 'PCA | 876 IMOS GBR-MGD MAGs - (95% ANI)',
legend = T,
ellipse = TRUE,
ind.names = F,
col.per.group =c("indianred", # Sampling trip 1
"indianred4", # Sampling trip 2
"red3", # Sampling trip 3
"slateblue"), # Sampling trip 4
legend.title = 'Sampling trip'
)
Main clustering patterns of seawater microbial communities. The PCA ordination plots show clear differences between microbial communities sampled during the summer/wet season (red) and winter/dry season (blue), with 50% of variance being attributable to dimension 1. Samples collected in the peak of summer (Trip 3) additionally separate from early summer sampling (Trips 1 and 2) on PCA dimension 2.
# I will need this later for plotting
# Defining MAG names - full taxonomy
MAGs.full.name.and.OTU <- left_join(otu_table(pMAGs_95ANI_phyloseq_clr) %>%
as.data.frame %>%
rownames_to_column("OTU"),
tax_table(pMAGs_95ANI_phyloseq_clr) %>%
as.data.frame %>%
rownames_to_column("OTU")) %>%
tidyr::unite(taxonomy, c(OTU, Domain, Class, Order, Family, Genus, Species), sep = "; ") # Adding Taxonomy info
## Joining, by = "OTU"
MAGs.full.name.and.OTU <- as.character(MAGs.full.name.and.OTU$taxonomy)
# Exporting the names of taxa now, and making them short - easier plotting!
MAGs.name.short <- left_join(otu_table(pMAGs_95ANI_phyloseq_clr) %>%
as.data.frame %>%
rownames_to_column("OTU"),
tax_table(pMAGs_95ANI_phyloseq_clr) %>%
as.data.frame %>%
rownames_to_column("OTU")) %>%
tidyr::unite(taxonomy, c(Class, Order, Family, Genus, Species), sep = "; ") # Adding Taxonomy info
## Joining, by = "OTU"
MAGs.name.short <- as.character(MAGs.name.short$taxonomy)
MAGs.name.super.short <- left_join(otu_table(pMAGs_95ANI_phyloseq_clr) %>%
as.data.frame %>%
rownames_to_column("OTU"),
tax_table(pMAGs_95ANI_phyloseq_clr) %>%
as.data.frame %>%
rownames_to_column("OTU")) %>%
tidyr::unite(taxonomy, c(Family, Genus, Species), sep = "; ") # Adding Taxonomy info
## Joining, by = "OTU"
MAGs.name.super.short <- as.character(MAGs.name.super.short$taxonomy)
pca_open_closed <- plotIndiv(result.pca,
group = sample_data(pMAGs_95ANI_phyloseq_clr)$Open_or_Closed_to_fishing,
pch = as.factor(sample_data(pMAGs_95ANI_phyloseq_clr)$SECTOR),
title = 'PCA | 876 IMOS-MGD MAGs (95% ANI)',
legend = T,
ellipse = T,
ind.names = F,
col.per.group = c("seagreen3", # Closed to fishing
"steelblue4"), # open to fishing
legend.title = 'Reef Protection Status'
)
PCA on its own does not discriminate between zones, and sites are clustering based on geographic proximity (i.e. sector) and time (i.e. sampling trip). Will (sPLS-DA) improve the separation between zones as a supervised approach?
The perf() function evaluates the performance of PLS-DA - i.e., its ability to rightly classify ‘new’ samples into their category (no-take and take zones) using repeated cross-validation. We initially choose a large number of components (here ncomp = 10) and assess the model as we gradually increase the number of components. Here we use 4-fold CV repeated 50 times.
plsda.open.closed <- mixOmics::plsda(X = MAGs,
Y = metadata_IMOS_MGD_MAGs$Open_or_Closed_to_fishing,
ncomp = 10)
perf.plsda.open.closed <- perf(plsda.open.closed,
validation = 'Mfold',
folds = 4,
progressBar = TRUE, # Set to TRUE to track progress
nrepeat = 50) # We suggest nrepeat = 50 (at least)
##
## comp 1
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
## comp 2
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
## comp 3
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
## comp 4
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
## comp 5
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
## comp 6
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
## comp 7
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
## comp 8
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
## comp 9
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
## comp 10
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
# Plot
plot(perf.plsda.open.closed, sd = TRUE, legend.position = 'horizontal')
Tuning the number of components in PLS-DA on the IMOS-MGD MAG data (876 MAGs, drep at 95% ANI). For each component, repeated cross-validation (50 ×4−fold CV) is used to evaluate the PLS-DA classification performance (overall and balanced error rate BER, and for each type of prediction distance: max.dist, centroids.dist and mahalanobis.dist) to discriminate between no-take and take zones based on the seawater microbiome. Bars show the standard deviation across the repeated folds. The plot shows that the error rate keeps dropping as we increase the number of components, but I will retain 2 dimensions in the final PLS-DA model (1) to avoid overfitting, and because (2) I am only aiming to discriminate between two categorical outcomes.
This is odd because the model keeps adding components, but 1-2 PCs should be enough for two outcomes (categorical).
We now run our final PLS-DA model that includes 2 components:
final.plsda.open.closed <- mixOmics::plsda(X = MAGs,
Y = metadata_IMOS_MGD_MAGs$Open_or_Closed_to_fishing,
ncomp = 2)
pls.da.open.closed <- plotIndiv(final.plsda.open.closed,
ind.names = FALSE,
group = sample_data(pMAGs_95ANI_phyloseq_clr)$Open_or_Closed_to_fishing,
# pch.levels = sample_data(pMAGs_95ANI_phyloseq_clr)$Sampling_trip,
pch = as.factor(sample_data(pMAGs_95ANI_phyloseq_clr)$Sampling_trip),
legend=TRUE,
col.per.group = c("seagreen3", # Closed to fishing
"steelblue4"), # open to fishing
legend.title = 'Reef Protection Status',
comp=c(1,2),
ellipse = TRUE,
title = 'PLS-DA on IMOS-MGD MAGs (876, 95% ANI derep)',
# X.label = 'PLS-DA comp 1',
# Y.label = 'PLS-DA comp 2',
)
pls.da.open.closed <- plotIndiv(final.plsda.open.closed,
ind.names = FALSE,
group = sample_data(pMAGs_95ANI_phyloseq_clr)$Sampling_trip,
# pch.levels = sample_data(pMAGs_95ANI_phyloseq_clr)$Sampling_trip,
pch = as.factor(sample_data(pMAGs_95ANI_phyloseq_clr)$Open_or_Closed_to_fishing),
legend=TRUE,
col.per.group =c("indianred", # Sampling trip 1
"indianred4", # Sampling trip 2
"red3", # Sampling trip 3
"slateblue"), # Sampling trip 4
legend.title = 'Sampling trip',
comp=c(1,2),
ellipse = TRUE,
title = 'PLS-DA on IMOS-MGD MAGs (876, 95% ANI derep)',
# X.label = 'PLS-DA comp 1',
# Y.label = 'PLS-DA comp 2',
)
We can observe improved clustering according to reef protection status (top), compared with PCA. This is to be expected since the PLS-DA model includes the class information of each sample, and aims to discriminate between them. From the plotIndiv() function, we observe some discrimination between no-take and take zones mostly on component 1 (x-axis), however we can still see the trip effect (bottom). The axis labels indicate the amount of variation explained per component, however, the interpretation of this amount is not as important as in PCA, as PLS-DA aims to maximise the covariance between components associated to X (predictor dataset, i.e. the 876 IMOS-MGD MAGs) and Y (categorical “response”, i.e. no-take and take zones), rather than the variance of X (done in PCA).
We can rerun a more extensive performance evaluation with more repeats for our final model:
perf.final.plsda.open.closed <- perf(final.plsda.open.closed,
validation = 'Mfold',
folds = 4,
progressBar = TRUE, # TRUE to track progress
nrepeat = 50) # we recommend 50
Numerical outputs of interest include the final overall performance for 2 components (Ballanced error rate, max.dist):
perf.final.plsda.open.closed$error.rate$BER[, 'max.dist']
## comp1 comp2
## 0.3854246 0.2816495
We can also get the error rate per class across each component:
perf.final.plsda.open.closed$error.rate.class$max.dist
## comp1 comp2
## C 0.4617582 0.2758242
## O 0.3090909 0.2874747
As many of the 876 MAGs in X may be noisy or uninformative to discriminate between no-take and take reefs, an sPLS-DA analysis (sparse variant) may help refine the sample clusters and select a small subset of variables relevant to discriminate each class.
We estimate the classification error rate with respect to the number of selected variables in the model with the function tune.splsda(). The tuning is being performed one component at a time inside the function and the optimal number of variables to select is automatically retrieved after each component run.
Previously, we determined the optimal number of components to be ncomp = 2 with PLS-DA. Here we set ncomp = 3 to further assess if this would be the case for a sparse model, and use 4-fold cross validation repeated 50 times.
We first define a grid of keepX values. For example here, we define a fine grid at the start, and then specify a coarser, larger sequence of values:
# Grid of possible keepX values that will be tested for each comp
list.keepX <- c(1:10, seq(20, 150, 10))
list.keepX
## [1] 1 2 3 4 5 6 7 8 9 10 20 30 40 50 60 70 80 90 100
## [20] 110 120 130 140 150
tune.splsda.open.closed <- tune.splsda(X = MAGs,
Y = metadata_IMOS_MGD_MAGs$Open_or_Closed_to_fishing,
ncomp = 8,
progressBar = TRUE,
validation = 'Mfold',
folds = 10,
# dist = 'max.dist',
test.keepX = list.keepX,
nrepeat = 50)
##
## comp 1
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
## comp 2
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
## comp 3
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
## comp 4
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
## comp 5
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
## comp 6
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
## comp 7
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
## comp 8
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
The following command line will output the mean error rate for each component and each tested keepX value given the past (tuned) components.
# Just a head of the classification error rate per keepX (in rows) and comp
head(tune.splsda.open.closed$error.rate)
## comp1 comp2 comp3 comp4 comp5 comp6 comp7
## 1 0.3465901 0.2774625 0.2301887 0.1961860 0.1490299 0.1189066 0.10095682
## 2 0.3414441 0.2816839 0.2294705 0.1881141 0.1489022 0.1168132 0.09888667
## 3 0.3308625 0.2829726 0.2298002 0.1785037 0.1481507 0.1156044 0.09744034
## 4 0.3294250 0.2783594 0.2312288 0.1720069 0.1470785 0.1141103 0.09601177
## 5 0.3286469 0.2723443 0.2314308 0.1676701 0.1468232 0.1147075 0.09552781
## 6 0.3288722 0.2669231 0.2318881 0.1646431 0.1469808 0.1141581 0.09593185
## comp8
## 1 0.09633922
## 2 0.09500833
## 3 0.09370740
## 4 0.09355311
## 5 0.09351759
## 6 0.09278388
This output globally shows that the classification error rate continues to decrease after the second component in sparse PLS-DA, however if only discriminating between two categorical outcomes, 1 or 2 components should be kept to avoid overfitting.***
We display the mean classification error rate on each component, bearing in mind that each component is conditional on the previous components calculated with the optimal number of selected variables. The diamond in the figure below indicates the best keepX value to achieve the lowest error rate per component.
# To show the error bars across the repeats:
plot(tune.splsda.open.closed, sd = TRUE)
Tuning keepX for the sPLS-DA performed on the IMOS GBR-MGD MAGs (876 genomes, drep at 95% ANI). Each coloured line represents the balanced error rate (y-axis) per component across all tested keepX values (x-axis) with the standard deviation based on the repeated cross-validation folds. The diamond indicates the optimal keepX value on a particular component which achieves the lowest classification error rate as determined with a one-sided t−test. As sPLS-DA is an iterative algorithm, values represented for a given component (e.g. comp 1 to 2) include the optimal keepX value chosen for the previous component (comp 1).
The tuning results depend on the tuning grid list.keepX, as well as the values chosen for folds and nrepeat. Therefore, we recommend assessing the performance of the final model, as well as examining the stability of the selected variables across the different folds, as detailed in the next section.
The figure above shows that the error rate decreases when more components are included in sPLS-DA. To obtain a more reliable estimation of the error rate, the number of repeats should be increased (between 50 to 100). This type of graph helps not only to choose the ‘optimal’ number of variables to select, but also to confirm the number of components ncomp. From the code below, we can assess that in fact, the addition of a fourth component does not improve the classification (no statistically significant improvement according to a one-sided t−test), hence we can choose ncomp = 3.
# The optimal number of components according to our one-sided t-tests
tune.splsda.open.closed$choice.ncomp$ncomp
## [1] 7
# The optimal keepX parameter according to minimal error rate
tune.splsda.open.closed$choice.keepX
## comp1 comp2 comp3 comp4 comp5 comp6 comp7 comp8
## 5 30 150 20 150 140 5 30
Here is our final sPLS-DA model with the optimal number of components and features (keepX) obtained from our tuning step.
# Optimal number of components based on t-tests on the error rate
ncomp <- tune.splsda.open.closed$choice.ncomp$ncomp
ncomp
## [1] 7
# Optimal number of variables to select
select.keepX <- tune.splsda.open.closed$choice.keepX[1:ncomp]
select.keepX
## comp1 comp2 comp3 comp4 comp5 comp6 comp7
## 5 30 150 20 150 140 5
final.splsda.open.closed <- mixOmics::splsda(X = MAGs,
Y = metadata_IMOS_MGD_MAGs$Open_or_Closed_to_fishing,
ncomp = ncomp,
keepX = select.keepX)
# You can choose to skip the tuning step, and input your arbitrarily chosen parameters in the following code (simply specify your own ncomp and keepX values):
The performance of the model with the ncomp and keepX parameters is assessed with the perf() function. We use 4-fold validation (folds = 4), repeated 50 times (nrepeat = 50).
The classification error rates that are output include both the overall error rate, as well as the balanced error rate (BER) when the number of samples per group is not balanced - as is the case in this study.
perf.final.splsda.open.closed <- perf(final.splsda.open.closed,
folds = 4,
validation = "Mfold",
progressBar = TRUE,
nrepeat = 50)
# perf.splsda.srbct # Lists the different outputs
perf.final.splsda.open.closed$error.rate
## $overall
## max.dist centroids.dist mahalanobis.dist
## comp1 0.3285263 0.3223158 0.3223158
## comp2 0.2477895 0.2894737 0.2474737
## comp3 0.2310526 0.3173684 0.2292632
## comp4 0.1648421 0.2978947 0.1656842
## comp5 0.1371579 0.2710526 0.1364211
## comp6 0.1211579 0.2544211 0.1211579
## comp7 0.1169474 0.2652632 0.1173684
##
## $BER
## max.dist centroids.dist mahalanobis.dist
## comp1 0.3291853 0.3221778 0.3221778
## comp2 0.2482295 0.2900588 0.2474292
## comp3 0.2329948 0.3164891 0.2305939
## comp4 0.1670973 0.2974914 0.1677189
## comp5 0.1387379 0.2693185 0.1378621
## comp6 0.1217238 0.2530481 0.1216262
## comp7 0.1171950 0.2640737 0.1175014
We can also examine the error rate per class:
perf.final.splsda.open.closed$error.rate.class
## $max.dist
## comp1 comp2 comp3 comp4 comp5 comp6 comp7
## C 0.3448352 0.2586813 0.2791209 0.2206593 0.1762637 0.1351648 0.1230769
## O 0.3135354 0.2377778 0.1868687 0.1135354 0.1012121 0.1082828 0.1113131
##
## $centroids.dist
## comp1 comp2 comp3 comp4 comp5 comp6 comp7
## C 0.3189011 0.3039560 0.2956044 0.2879121 0.2281319 0.2204396 0.2358242
## O 0.3254545 0.2761616 0.3373737 0.3070707 0.3105051 0.2856566 0.2923232
##
## $mahalanobis.dist
## comp1 comp2 comp3 comp4 comp5 comp6 comp7
## C 0.3189011 0.2463736 0.2621978 0.2160440 0.1720879 0.1327473 0.1206593
## O 0.3254545 0.2484848 0.1989899 0.1193939 0.1036364 0.1105051 0.1143434
These results can be compared with the performance of PLS-DA and show the benefits of variable selection to not only obtain a parsimonious model, but also to improve the classification error rate (overall and per class).
During the repeated cross-validation process in perf() we can record how often the same variables are selected across the folds. This information is important to answer the question: How reproducible is my molecular signature when the training set is perturbed via cross-validation?
par(mfrow=c(1,2))
# For component 1
stable.comp1 <- perf.final.splsda.open.closed$features$stable$comp1
barplot(stable.comp1,
xlab = 'variables selected across CV folds',
ylab = 'Stability frequency',
main = 'Feature stability for comp = 1')
# For component 2
stable.comp2 <- perf.final.splsda.open.closed$features$stable$comp2
barplot(stable.comp2,
xlab = 'variables selected across CV folds',
ylab = 'Stability frequency',
main = 'Feature stability for comp = 2')
Stability of variable selection from the sPLS-DA on the IMOS GBR-MGD MAGs data (876 MAGs, drep at 95% ANI). We use a by-product from perf() to assess how often the same MAGs are selected for a given keepX value in the final sPLS-DA model. The barplot represents the frequency of selection across repeated CV folds for each selected gene for component 1 and 2. The MAGs are ranked according to decreasing frequency.
par(mfrow=c(1,1))
The function selectVar() outputs the variables (pMAGs) selected for a given component and their loading values (ranked in decreasing absolute value). We concatenate those results with the feature stability, as shown here for variables selected on component 1:
# First extract the name of selected var:
select.name <- selectVar(final.splsda.open.closed, comp = 1)$name
# Then extract the stability values from perf:
stability <- perf.final.splsda.open.closed$features$stable$comp1[select.name]
# Just the head of the stability of the selected var:
head(cbind(selectVar(final.splsda.open.closed, comp = 1)$value, stability))
# Per reef protection status
spls.da.open.closed <- plotIndiv(final.splsda.open.closed,
comp = c(1,2),
ind.names = FALSE,
pch = as.factor(sample_data(pMAGs_95ANI_phyloseq_clr)$Sampling_trip),
group = sample_data(pMAGs_95ANI_phyloseq_clr)$Open_or_Closed_to_fishing,
ellipse = TRUE,
legend=TRUE,
col.per.group = c("seagreen3", # Closed to fishing
"steelblue4"), # open to fishing
legend.title = 'Reef Protection Status',
# star = TRUE,
title = 'IMOS-MGD MAGs open/closed, sPLS-DA comp 1 - 2')
# Per trip
spls.da.open.closed.per.trip <- plotIndiv(final.splsda.open.closed,
comp = c(1,2),
ind.names = FALSE,
pch = as.factor(sample_data(pMAGs_95ANI_phyloseq_clr)$Open_or_Closed_to_fishing),
group = sample_data(pMAGs_95ANI_phyloseq_clr)$Sampling_trip,
ellipse = TRUE,
legend=TRUE,
col.per.group =c("indianred", # Sampling trip 1
"indianred4", # Sampling trip 2
"red3", # Sampling trip 3
"slateblue"), # Sampling trip 4
legend.title = 'AIMS-LTMP Sampling transect',
# star = TRUE,
title = 'IMOS-MGD MAGs open/closed, sPLS-DA comp 1 - 2')
Are these sPLS-DA sample plots different than the PLS-DA plot? They are showing similar patterns - while we do see some clustering between the zones based on selected features, we still see strong spatiotemporal effects as well.
We represent the pMAGs selected with sPLS-DA on the correlation circle plot. Here to increase interpretation, we specify the argument var.names as the first 10 characters of the gene names. We also reduce the size of the font with the argument cex.
plotVar(final.splsda.open.closed,
comp = c(1,2),
var.names = list(MAGs.name.super.short),
cex = 3)
Correlation circle plot representing the MAGs selected by sPLS-DA performed on the 876 IMOS GBR-MGD MAGs (drep at 95% ANI). MAG names are truncated, and we only show the MAGs selected by sPLS-DA in components 1 and 2. This graphic should be interpreted in conjunction with the sample plot.
By considering both the correlation circle plot and the sample plot, we observe that a group of pMAGs with a positive correlation with component 1 (‘EH domain’, ‘proteasome’ etc.) are associated with the BL samples. We also observe two groups of genes either positively or negatively correlated with component 2. These genes are likely to characterise either the NB + RMS classes, or the EWS class.
This interpretation can be further examined with the plotLoadings() function. In this plot, the loading weights of each selected variable on each component are represented. The colours indicate the group in which the expression of the selected pMAGs is maximal based on the mean (method = ‘median’ is also available for skewed data). For example on component 1:
plotLoadings(final.splsda.open.closed,
comp = 1,
method = 'mean',
contrib = 'max',
name.var = MAGs.name.super.short)
Loading plot of the MAGs selected by sPLS-DA on component 1 on the 876 IMOS GBR-MGD MAGs. MAGs are ranked according to their loading weight (most important at the bottom to least important at the top), represented as a barplot. Colours indicate the class for which a particular MAGs is maximally enriched, on average, in this particular class (i.e. no-take or take reefs). The plot helps to further characterise the molecular signatures and should be interpreted jointly with the sPLS-DA sample plot.
To complete the visualisation, the CIM in this special case is a simple hierarchical heatmap representing abundance values of the MAGs selected across all components with respect to each sample. Here we use an Euclidean distance with Complete agglomeration method, and we specify the argument row.sideColors to colour the samples according to their sampling trip.
cols.LTMP.trips <- c("indianred", # Sampling trip 1
"indianred4", # Sampling trip 2
"red3", # Sampling trip 3
"slateblue") # Sampling trip 4
LTMP.trips.cols <- palette(cols.LTMP.trips)[as.numeric(as.factor(metadata_IMOS_MGD_MAGs$Sampling_trip))]
cols.LTMP.trips <- c("indianred", # Sampling trip 1
"indianred4", # Sampling trip 2
"red3", # Sampling trip 3
"slateblue") # Sampling trip 4
LTMP.trips.cols <- palette(cols.LTMP.trips)[as.numeric(as.factor(metadata_IMOS_MGD_MAGs$Sampling_trip))]
cim(final.splsda.open.closed,
comp = 1:2,
# cutoff = 0.95,
col.names = MAGs.name.short,
margins=c(37, # bottom
25), # right
keysize = c(1, 0.4),
# row.sideColors = color.mixo(as.numeric(as.factor(metadata_IMOS_MGD_MAGs$Open_or_Closed_to_fishing))),
row.names = T,
legend = list(color.mixo(as.numeric(as.factor(metadata_IMOS_MGD_MAGs$Sampling_trip))),
title = "AIMS LTMP trips"),
row.sideColors = LTMP.trips.cols,
title = "sPLS-DA, components 1 and 2",
xlab = "Indicator MAGs that discriminate between take and no take zones",
ylab = "Reef sites")
Clustered Image Map of the MAGs selected by sPLS-DA on the 876 IMOS GBR-MGD MAGs on the first 2 components. A hierarchical clustering based on the MAG enrichment vales for the selected MAGs, with reef sites in rows coloured according to their reef protection status (and clustered using Euclidean distance with Complete agglomeration method).
cols.open.closed <- c("seagreen3","steelblue4")
open.closed.cols <- palette(cols.open.closed)[as.numeric(as.factor(metadata_IMOS_MGD_MAGs$Open_or_Closed_to_fishing))]
cols.open.closed <- c("seagreen3","steelblue4")
open.closed.cols <- palette(cols.open.closed)[as.numeric(as.factor(metadata_IMOS_MGD_MAGs$Open_or_Closed_to_fishing))]
cim(final.splsda.open.closed,
comp = 1:2,
# cutoff = 0.95,
col.names = MAGs.name.short,
margins=c(37, # bottom
25), # right
keysize = c(1, 0.4),
# row.sideColors = color.mixo(as.numeric(as.factor(metadata_IMOS_MGD_MAGs$Open_or_Closed_to_fishing))),
row.names = T,
legend = list(color.mixo(as.numeric(as.factor(metadata_IMOS_MGD_MAGs$Open_or_Closed_to_fishing))),
title = "Reef Protection Status"),
row.sideColors = open.closed.cols,
title = "sPLS-DA, components 1 and 2",
xlab = "Indicator MAGs that discriminate between take and no take zones",
ylab = "Reef sites")
Clustered Image Map of the MAGs selected by sPLS-DA on the 876 IMOS GBR-MGD MAGs on the first 2 components. A hierarchical clustering based on the MAG enrichment vales for the selected MAGs, with reef sites in rows coloured according to their reef protection status (and clustered using Euclidean distance with Complete agglomeration method).
Overall, the (s)PLS-DA model is clearly selecting for winter-specific taxa, and we still see strong batch effects of seasonality and to a lesser extent geography. For monitoring purposes, it would be of higher utility to identify seawater biomarkers that would be stable indicators of reef zoning across the Great Barrier Reef, and regardless of season and geography.
In Terzin et al. 2025 (Microbiome) we used Multivariate INTegration (Rohart et al. 2017) to identify shared microbial indicators across the four sampling trips in our data, however, this study will also contain AIMS-LTMP benthic cover metrics, and since LTMP data has historically been analysed across GBR sectors, here we will select SECTOR as separate studies in MINT.
Let’s mine for stable microbial biomarkers.
The perf() function is used to estimate the performance of the MINT-sPLS-DA model using Leave One Group Out Cross Validation (LOGOCV), and to choose the optimal number of components for our final model
# Extracting the dataframe
### Important: pMAGs_95ANI_phyloseq_CLR_per_sector is the new object where I clr transformed within each sector separately! The object MAGs should be made from this object: pMAGs_95ANI_phyloseq_clr, and it should be used when not accounting for sector (PCA, sPLS-DA, etc)
MAGs_MINT <- as.data.frame(pMAGs_95ANI_phyloseq_CLR_per_sector@otu_table)
MAGs_MINT <- t(MAGs_MINT)
# Check dimensions of data
dim(MAGs_MINT)
## [1] 190 876
class(MAGs_MINT)
## [1] "matrix" "array"
# This was already done in the code above - CLR normalised across all data
# MAGs <- as.data.frame(pMAGs_95ANI_phyloseq_clr@otu_table)
# MAGs <- t(MAGs)
# Check dimensions of data
# dim(MAGs)
# class(MAGs)
mint.splsda.open.closed <- mint.splsda(X = MAGs_MINT,
Y = pMAGs_95ANI_phyloseq_CLR_per_sector@sam_data$Open_or_Closed_to_fishing,
study = pMAGs_95ANI_phyloseq_CLR_per_sector@sam_data$Sampling_trip,
ncomp = 10)
perf.mint.splsda.open.closed <- perf(mint.splsda.open.closed)
plot(perf.mint.splsda.open.closed)
Choosing the number of components in mint.splsda using perf() with LOGOCV to discriminate between reefs that are open or closed to fishing. Classification error rates (overall and balanced - BER) are represented on the y-axis with respect to the number of components on the x-axis for each prediction distance. Overall and balanced error rates show the same trend as the design is balanced (i.e. the same number of protected and non-protected reefs in each sampling trip). The plot shows that the error rate reaches a minimum with three dimensions with the centroids prediction distance. We therefore retained 3 PCs in downstream analysis.
# Re-running MINT sPLS-DA
mint.splsda.open.closed.sector <- mint.splsda(X = MAGs_MINT,
Y = pMAGs_95ANI_phyloseq_CLR_per_sector@sam_data$Open_or_Closed_to_fishing,
study = pMAGs_95ANI_phyloseq_CLR_per_sector@sam_data$SECTOR_N_S,
ncomp = 10)
perf.mint.splsda.open.closed.sector <- perf(mint.splsda.open.closed.sector)
plot(perf.mint.splsda.open.closed.sector)
Choosing the number of components in mint.splsda using perf() with LOGOCV to discriminate between reefs that are open or closed to fishing. Classification error rates (overall and balanced - BER) are represented on the y-axis with respect to the number of components on the x-axis for each prediction distance. Overall and balanced error rates show the same trend as the design is balanced (i.e. the same number of protected and non-protected reefs in each sampling trip). The plot shows that the error rate reaches a minimum with three dimensions with the centroids prediction distance. We therefore retained 3 PCs in downstream analysis.
# Filtering out the samples from Swains - this will be the query
pMAGs_95ANI_phyloseq_no_Swains <- subset_samples(pMAGs_95ANI_phyloseq, SECTOR_N_S != "06_Swains")
pMAGs_95ANI_phyloseq_Swains_only <- subset_samples(pMAGs_95ANI_phyloseq, SECTOR_N_S == "06_Swains")
# Need to CLR transform again - within each sector:
# 1. Extract sample data and sectors
sector_info <- sample_data(pMAGs_95ANI_phyloseq_no_Swains)$SECTOR_N_S
sectors <- unique(sector_info)
# 2. Initialize an empty list to store CLR-transformed subsets
phy_clr_list <- list()
# 3. Loop through each sector, apply CLR, and store results
for (sector in sectors) {
# Subset phyloseq by sector
phy_subset <- subset_samples(pMAGs_95ANI_phyloseq_no_Swains, SECTOR_N_S == sector)
# Apply CLR to the subset (with pseudo-count to avoid zeros)
phy_subset_clr <- microbiome::transform(phy_subset, transform = "clr")
# Store in list
phy_clr_list[[sector]] <- phy_subset_clr
}
# 4. Merge all sectors back into one phyloseq object
pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains <- do.call(merge_phyloseq, phy_clr_list)
# 5. Verify the output
head(otu_table(pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains)) # Should show CLR values
## OTU Table: [6 taxa and 170 samples]
## taxa are rows
## 11049_1__BBG5542_21S000057 11049_2__BBG5543_21S000444
## 11049_1__concoct_009 3.370796 2.9741482
## 11049_1__concoct_027_sub 3.123710 3.1817013
## 11049_1__concoct_068 5.768237 5.3120043
## 11049_1__concoct_084 1.684442 0.9877569
## 11049_1__concoct_129_sub 2.463162 2.2821420
## 11049_1__concoct_161_sub 2.222484 2.0688801
## 11049_3__BBG5544_21S000445 11049_4__BBG5545_21S000446
## 11049_1__concoct_009 3.2916897 3.252276
## 11049_1__concoct_027_sub 2.9725147 2.818456
## 11049_1__concoct_068 5.4217023 5.678063
## 11049_1__concoct_084 0.9842408 1.064172
## 11049_1__concoct_129_sub 2.4067804 2.416136
## 11049_1__concoct_161_sub 2.3021121 2.304723
## 11162_1__BBG5534 11162_2__BBG5535 11162_3__BBG5536
## 11049_1__concoct_009 2.0948240 2.199595 2.3759332
## 11049_1__concoct_027_sub 1.8279075 2.634036 2.6258766
## 11049_1__concoct_068 4.9349705 5.612005 5.5430439
## 11049_1__concoct_084 0.8662672 1.296417 0.9540593
## 11049_1__concoct_129_sub 1.8090950 1.022383 1.2873464
## 11049_1__concoct_161_sub 2.4930078 2.170096 2.2373637
## 11162_4__BBG5537 lagoon_1__BBG5466_21S000059
## 11049_1__concoct_009 2.3110268 -3.5442912
## 11049_1__concoct_027_sub 3.9519461 6.1133213
## 11049_1__concoct_068 5.1225528 2.3098565
## 11049_1__concoct_084 0.3635965 -3.0732106
## 11049_1__concoct_129_sub 1.2039118 -5.1711209
## 11049_1__concoct_161_sub 1.9742170 -0.1809867
## lagoon_2__BBG5467_21S000391
## 11049_1__concoct_009 -4.406179
## 11049_1__concoct_027_sub 4.846936
## 11049_1__concoct_068 1.478775
## 11049_1__concoct_084 -3.822232
## 11049_1__concoct_129_sub -3.766142
## 11049_1__concoct_161_sub 0.275313
## lagoon_3__BBG5468_21S000392
## 11049_1__concoct_009 -3.5364269
## 11049_1__concoct_027_sub 6.5720419
## 11049_1__concoct_068 2.1125473
## 11049_1__concoct_084 -2.8533301
## 11049_1__concoct_129_sub -6.3498376
## 11049_1__concoct_161_sub 0.0257549
## lagoon_4__BBG5469_21S000393
## 11049_1__concoct_009 -4.2904015
## 11049_1__concoct_027_sub 4.8503565
## 11049_1__concoct_068 0.8802177
## 11049_1__concoct_084 -4.7699746
## 11049_1__concoct_129_sub -3.8085635
## 11049_1__concoct_161_sub -0.2460979
## mantis_1__BBG5538_22S000873
## 11049_1__concoct_009 -5.52400965
## 11049_1__concoct_027_sub 5.01221404
## 11049_1__concoct_068 1.43747595
## 11049_1__concoct_084 -3.78354347
## 11049_1__concoct_129_sub -3.29399525
## 11049_1__concoct_161_sub -0.05342106
## mantis_2__BBG5539_21S000058
## 11049_1__concoct_009 -5.7508481
## 11049_1__concoct_027_sub 0.8089162
## 11049_1__concoct_068 0.9059009
## 11049_1__concoct_084 -3.1193380
## 11049_1__concoct_129_sub -2.9437732
## 11049_1__concoct_161_sub 0.2863107
## mantis_3__BBG5540_21S000442
## 11049_1__concoct_009 -4.2096440
## 11049_1__concoct_027_sub 3.6598656
## 11049_1__concoct_068 0.8771607
## 11049_1__concoct_084 -3.4787565
## 11049_1__concoct_129_sub -3.1458178
## 11049_1__concoct_161_sub 0.1559014
## mantis_4__BBG5541_21S000443
## 11049_1__concoct_009 -5.8888147
## 11049_1__concoct_027_sub 3.4022758
## 11049_1__concoct_068 0.8568137
## 11049_1__concoct_084 -3.0171351
## 11049_1__concoct_129_sub -3.6552225
## 11049_1__concoct_161_sub 0.3937635
## mcSweeney_1__BBG5458_22S000870
## 11049_1__concoct_009 -0.5025488
## 11049_1__concoct_027_sub 4.4002476
## 11049_1__concoct_068 4.2114713
## 11049_1__concoct_084 0.9062853
## 11049_1__concoct_129_sub 2.2057352
## 11049_1__concoct_161_sub 0.8842465
## mcSweeney_2__BBG5459_21S000386
## 11049_1__concoct_009 -0.1938102
## 11049_1__concoct_027_sub 4.4438928
## 11049_1__concoct_068 3.9745406
## 11049_1__concoct_084 0.7396885
## 11049_1__concoct_129_sub 2.3522330
## 11049_1__concoct_161_sub 0.8081281
## mcSweeney_3__BBG5460_21S000055
## 11049_1__concoct_009 -0.7558493
## 11049_1__concoct_027_sub 3.1891906
## 11049_1__concoct_068 3.7228825
## 11049_1__concoct_084 -0.3236009
## 11049_1__concoct_129_sub 2.3669472
## 11049_1__concoct_161_sub 1.2721264
## mcSweeney_4__BBG5461_21S000387
## 11049_1__concoct_009 -0.4222790
## 11049_1__concoct_027_sub 3.5835740
## 11049_1__concoct_068 3.6944218
## 11049_1__concoct_084 0.6802937
## 11049_1__concoct_129_sub 2.1370584
## 11049_1__concoct_161_sub 0.9360145
## monsoon_1__BBG5474_21S000397
## 11049_1__concoct_009 0.6553794
## 11049_1__concoct_027_sub 3.8759839
## 11049_1__concoct_068 3.6787005
## 11049_1__concoct_084 0.6668389
## 11049_1__concoct_129_sub 2.6310975
## 11049_1__concoct_161_sub 1.0406188
## monsoon_2__BBG5475_21S000056
## 11049_1__concoct_009 0.8558594
## 11049_1__concoct_027_sub 5.3469434
## 11049_1__concoct_068 5.0717512
## 11049_1__concoct_084 2.1569714
## 11049_1__concoct_129_sub 1.9529711
## 11049_1__concoct_161_sub 1.0768312
## monsoon_3__BBG5476_22S000871
## 11049_1__concoct_009 0.8035741
## 11049_1__concoct_027_sub 4.5382756
## 11049_1__concoct_068 4.5045801
## 11049_1__concoct_084 1.5818124
## 11049_1__concoct_129_sub 1.9975626
## 11049_1__concoct_161_sub 1.3743526
## monsoon_4__BBG5477_21S000399
## 11049_1__concoct_009 0.2039197
## 11049_1__concoct_027_sub 5.4841818
## 11049_1__concoct_068 5.1036234
## 11049_1__concoct_084 2.4638963
## 11049_1__concoct_129_sub 1.7036180
## 11049_1__concoct_161_sub 1.1995614
## 13124_1__BBG5462_21S000388 13124_2__BBG5463_21S000389
## 11049_1__concoct_009 -0.8553297 -0.6419458
## 11049_1__concoct_027_sub 3.4578523 4.1975302
## 11049_1__concoct_068 1.8138944 1.6527494
## 11049_1__concoct_084 -3.3697122 -4.4382373
## 11049_1__concoct_129_sub -2.3419259 -2.1249949
## 11049_1__concoct_161_sub 3.8163157 3.6088799
## 13124_3__BBG5464_21S000390 13124_4__BBG5465_21S000061
## 11049_1__concoct_009 -1.272874 -0.7874982
## 11049_1__concoct_027_sub 3.292365 2.9948523
## 11049_1__concoct_068 1.274238 1.6702823
## 11049_1__concoct_084 -3.525390 -4.8264927
## 11049_1__concoct_129_sub -2.419055 -2.1184425
## 11049_1__concoct_161_sub 3.490427 3.7913670
## corbett_1__BBG5470_21S000394
## 11049_1__concoct_009 2.1624164
## 11049_1__concoct_027_sub 2.2965739
## 11049_1__concoct_068 -0.2355982
## 11049_1__concoct_084 -5.6186178
## 11049_1__concoct_129_sub -3.7860363
## 11049_1__concoct_161_sub 2.9064231
## corbett_2__BBG5471_21S000395
## 11049_1__concoct_009 2.2068148
## 11049_1__concoct_027_sub 2.1214228
## 11049_1__concoct_068 -0.2265411
## 11049_1__concoct_084 -5.6662753
## 11049_1__concoct_129_sub -3.8055230
## 11049_1__concoct_161_sub 2.8860276
## corbett_3__BBG5472_21S000396
## 11049_1__concoct_009 2.2700011
## 11049_1__concoct_027_sub 2.1061281
## 11049_1__concoct_068 -0.2227685
## 11049_1__concoct_084 -5.8566648
## 11049_1__concoct_129_sub -3.6410911
## 11049_1__concoct_161_sub 2.9469096
## corbett_4__BBG5473_21S000060
## 11049_1__concoct_009 2.3165793
## 11049_1__concoct_027_sub 2.1206636
## 11049_1__concoct_068 -0.1849683
## 11049_1__concoct_084 -5.8678317
## 11049_1__concoct_129_sub -3.4827777
## 11049_1__concoct_161_sub 2.9461467
## davie_1__BBG5454_21S000054 davie_3__BBG5456_21S000383
## 11049_1__concoct_009 3.062380 2.4210898
## 11049_1__concoct_027_sub 2.974682 1.4449672
## 11049_1__concoct_068 1.511398 0.8619122
## 11049_1__concoct_084 -4.664010 -3.5945795
## 11049_1__concoct_129_sub -4.307335 -3.9623042
## 11049_1__concoct_161_sub 1.655590 1.9420277
## davie_4__BBG5457_22S000869
## 11049_1__concoct_009 2.2778041
## 11049_1__concoct_027_sub 1.6710214
## 11049_1__concoct_068 0.5893236
## 11049_1__concoct_084 -5.1648350
## 11049_1__concoct_129_sub -4.6052193
## 11049_1__concoct_161_sub 1.8531190
## sanbank1_1__BBG5530_21S000062
## 11049_1__concoct_009 -3.2599449
## 11049_1__concoct_027_sub 3.5794350
## 11049_1__concoct_068 -0.6069338
## 11049_1__concoct_084 -4.3667877
## 11049_1__concoct_129_sub -5.8667413
## 11049_1__concoct_161_sub 2.7936924
## sanbank1_2__BBG5531_21S000438
## 11049_1__concoct_009 -2.6991173
## 11049_1__concoct_027_sub 2.9349611
## 11049_1__concoct_068 -0.7004593
## 11049_1__concoct_084 -3.7202421
## 11049_1__concoct_129_sub -6.1050653
## 11049_1__concoct_161_sub 2.7806326
## sanbank1_3__BBG5532_21S000439
## 11049_1__concoct_009 -3.248642
## 11049_1__concoct_027_sub 4.180362
## 11049_1__concoct_068 -1.041467
## 11049_1__concoct_084 -5.713746
## 11049_1__concoct_129_sub -6.601049
## 11049_1__concoct_161_sub 2.363171
## sanbank1_4__BBG5533_21S000440
## 11049_1__concoct_009 -2.906135
## 11049_1__concoct_027_sub 2.621534
## 11049_1__concoct_068 -1.151125
## 11049_1__concoct_084 -4.113550
## 11049_1__concoct_129_sub -6.382233
## 11049_1__concoct_161_sub 2.808380
## agincourt1_1__BBG6450_21S000073
## 11049_1__concoct_009 -3.76319583
## 11049_1__concoct_027_sub 3.16728347
## 11049_1__concoct_068 -2.29054959
## 11049_1__concoct_084 -5.64773703
## 11049_1__concoct_129_sub -4.78273959
## 11049_1__concoct_161_sub -0.05776113
## agincourt1_2__BBG6451_21S000475
## 11049_1__concoct_009 -3.4128917
## 11049_1__concoct_027_sub 3.9607882
## 11049_1__concoct_068 -1.8140359
## 11049_1__concoct_084 -5.1786757
## 11049_1__concoct_129_sub -5.3457298
## 11049_1__concoct_161_sub -0.1146211
## agincourt1_3__BBG6452_22S000879
## 11049_1__concoct_009 -2.9224450
## 11049_1__concoct_027_sub 4.4832338
## 11049_1__concoct_068 -0.7267437
## 11049_1__concoct_084 -5.1334629
## 11049_1__concoct_129_sub -5.0156799
## 11049_1__concoct_161_sub -0.5370812
## agincourt1_4__BBG6453_21S000477
## 11049_1__concoct_009 -2.6300972
## 11049_1__concoct_027_sub 4.5511359
## 11049_1__concoct_068 -1.2382005
## 11049_1__concoct_084 -4.6338269
## 11049_1__concoct_129_sub -7.1187335
## 11049_1__concoct_161_sub -0.4517767
## arlington_1__BBG6454_21S000478
## 11049_1__concoct_009 -2.3748778
## 11049_1__concoct_027_sub 2.6364086
## 11049_1__concoct_068 -3.9196165
## 11049_1__concoct_084 -4.6495776
## 11049_1__concoct_129_sub -4.4066315
## 11049_1__concoct_161_sub 0.1040125
## arlington_2__BBG6455_21S000479
## 11049_1__concoct_009 -1.232015
## 11049_1__concoct_027_sub 3.702501
## 11049_1__concoct_068 -3.109717
## 11049_1__concoct_084 -2.950087
## 11049_1__concoct_129_sub -4.170589
## 11049_1__concoct_161_sub 1.376973
## arlington_3__BBG6456_21S000480
## 11049_1__concoct_009 -0.7379217
## 11049_1__concoct_027_sub 4.3564217
## 11049_1__concoct_068 -2.6253564
## 11049_1__concoct_084 -2.5443873
## 11049_1__concoct_129_sub -4.0484647
## 11049_1__concoct_161_sub 1.2764946
## arlington_4__BBG6457_21S000074
## 11049_1__concoct_009 -0.2159744
## 11049_1__concoct_027_sub 3.7511182
## 11049_1__concoct_068 -2.9930175
## 11049_1__concoct_084 -2.8096143
## 11049_1__concoct_129_sub -4.0317694
## 11049_1__concoct_161_sub 1.5065035
## hastings_1__BBG6458_21S000481
## 11049_1__concoct_009 -2.2026675
## 11049_1__concoct_027_sub 3.8696712
## 11049_1__concoct_068 -2.9737762
## 11049_1__concoct_084 -6.5846941
## 11049_1__concoct_129_sub -4.8799460
## 11049_1__concoct_161_sub 0.8592621
## hastings_2__BBG6459_21S000482
## 11049_1__concoct_009 -2.5809328
## 11049_1__concoct_027_sub 3.1820978
## 11049_1__concoct_068 -3.0296266
## 11049_1__concoct_084 -6.5321765
## 11049_1__concoct_129_sub -4.5040283
## 11049_1__concoct_161_sub 0.6807078
## hastings_3__BBG6460_21S000483
## 11049_1__concoct_009 -1.919981
## 11049_1__concoct_027_sub 4.036361
## 11049_1__concoct_068 -2.691600
## 11049_1__concoct_084 -6.295738
## 11049_1__concoct_129_sub -4.314737
## 11049_1__concoct_161_sub 1.127382
## hastings_4__BBG6461_21S000484
## 11049_1__concoct_009 -2.2666045
## 11049_1__concoct_027_sub 3.6444670
## 11049_1__concoct_068 -2.6793424
## 11049_1__concoct_084 -5.8075638
## 11049_1__concoct_129_sub -4.8912731
## 11049_1__concoct_161_sub 0.8060441
## stcrispin_1__BBG5526_21S000434
## 11049_1__concoct_009 -5.7204513
## 11049_1__concoct_027_sub 4.8210407
## 11049_1__concoct_068 -1.0230058
## 11049_1__concoct_084 -3.1054915
## 11049_1__concoct_129_sub -4.7396220
## 11049_1__concoct_161_sub -0.1146492
## stcrispin_2__BBG5527_21S000435
## 11049_1__concoct_009 -3.3731208
## 11049_1__concoct_027_sub 4.7691025
## 11049_1__concoct_068 -1.5621035
## 11049_1__concoct_084 -5.2702408
## 11049_1__concoct_129_sub -4.5770936
## 11049_1__concoct_161_sub -0.7128613
## stcrispin_3__BBG5528_21S000436
## 11049_1__concoct_009 -5.55532077
## 11049_1__concoct_027_sub 5.03001600
## 11049_1__concoct_068 -0.92180611
## 11049_1__concoct_084 -3.16885419
## 11049_1__concoct_129_sub -5.23686704
## 11049_1__concoct_161_sub 0.03846081
## stcrispin_4__BBG5529_21S000437
## 11049_1__concoct_009 -4.5066029
## 11049_1__concoct_027_sub 4.6504229
## 11049_1__concoct_068 -0.9512549
## 11049_1__concoct_084 -3.1909262
## 11049_1__concoct_129_sub -4.2654409
## 11049_1__concoct_161_sub 0.2183684
## thetford_1__BBG6446_21S000471
## 11049_1__concoct_009 -0.4165723
## 11049_1__concoct_027_sub 2.8490724
## 11049_1__concoct_068 -2.7383273
## 11049_1__concoct_084 -4.7860201
## 11049_1__concoct_129_sub -4.6682371
## 11049_1__concoct_161_sub 0.6599087
## thetford_2__BBG6447_21S000472
## 11049_1__concoct_009 -0.02590532
## 11049_1__concoct_027_sub 3.99237478
## 11049_1__concoct_068 -2.31955005
## 11049_1__concoct_084 -3.60153156
## 11049_1__concoct_129_sub -4.62738450
## 11049_1__concoct_161_sub 0.62681598
## thetford_3__BBG6448_21S000473
## 11049_1__concoct_009 -0.01788948
## 11049_1__concoct_027_sub 3.20788024
## 11049_1__concoct_068 -2.20678068
## 11049_1__concoct_084 -4.19507005
## 11049_1__concoct_129_sub -4.69584534
## 11049_1__concoct_161_sub 0.86675752
## thetford_4__BBG6449_22S000878 boult_1__BBG5478
## 11049_1__concoct_009 -0.2763152 -3.1788441
## 11049_1__concoct_027_sub 3.8317119 1.3510636
## 11049_1__concoct_068 -2.5162166 -0.2830857
## 11049_1__concoct_084 -3.8098895 -3.5236846
## 11049_1__concoct_129_sub -4.2466071 -4.8905609
## 11049_1__concoct_161_sub 0.3448987 1.6900782
## boult_2__BBG5479 boult_3__BBG5480 boult_4__BBG5481
## 11049_1__concoct_009 -3.552563 -2.512553 -1.862364
## 11049_1__concoct_027_sub 3.651171 1.108121 3.390653
## 11049_1__concoct_068 1.641475 0.150035 1.492790
## 11049_1__concoct_084 -2.112544 -2.918018 -2.195264
## 11049_1__concoct_129_sub -4.951930 -6.444378 -4.559943
## 11049_1__concoct_161_sub 1.467722 1.896793 1.722616
## broomfield_1__BBG5502_21S000066
## 11049_1__concoct_009 -5.0595544
## 11049_1__concoct_027_sub 3.3208060
## 11049_1__concoct_068 2.9430200
## 11049_1__concoct_084 -0.5370951
## 11049_1__concoct_129_sub -4.3007120
## 11049_1__concoct_161_sub 2.7566386
## broomfield_2__BBG5503_22S000872
## 11049_1__concoct_009 -3.415576
## 11049_1__concoct_027_sub 1.907434
## 11049_1__concoct_068 1.355108
## 11049_1__concoct_084 -2.904751
## 11049_1__concoct_129_sub -3.127894
## 11049_1__concoct_161_sub 1.152238
## broomfield_3__BBG5504_21S000417
## 11049_1__concoct_009 -6.261119
## 11049_1__concoct_027_sub 1.421248
## 11049_1__concoct_068 1.381165
## 11049_1__concoct_084 -1.663477
## 11049_1__concoct_129_sub -4.469360
## 11049_1__concoct_161_sub 2.303101
## broomfield_4__BBG5505_21S000418
## 11049_1__concoct_009 -6.505017
## 11049_1__concoct_027_sub 3.457517
## 11049_1__concoct_068 2.464715
## 11049_1__concoct_084 -0.257942
## 11049_1__concoct_129_sub -4.425576
## 11049_1__concoct_161_sub 2.014573
## erskine_1__BBG5514_21S000425
## 11049_1__concoct_009 -4.9509647
## 11049_1__concoct_027_sub 0.0151869
## 11049_1__concoct_068 2.9012288
## 11049_1__concoct_084 -2.2822756
## 11049_1__concoct_129_sub -4.6373072
## 11049_1__concoct_161_sub 2.1224388
## erskine_2__BBG5515_21S000426
## 11049_1__concoct_009 -5.1354324
## 11049_1__concoct_027_sub 0.2864844
## 11049_1__concoct_068 2.9844870
## 11049_1__concoct_084 -2.1028862
## 11049_1__concoct_129_sub -4.4740339
## 11049_1__concoct_161_sub 2.0235192
## erskine_3__BBG5516_21S000427
## 11049_1__concoct_009 -5.0847685
## 11049_1__concoct_027_sub 0.2214587
## 11049_1__concoct_068 2.8206298
## 11049_1__concoct_084 -2.1280217
## 11049_1__concoct_129_sub -5.0847685
## 11049_1__concoct_161_sub 2.2539285
## erskine_4__BBG5517_21S000065
## 11049_1__concoct_009 -5.587851
## 11049_1__concoct_027_sub 0.661363
## 11049_1__concoct_068 3.105627
## 11049_1__concoct_084 -2.372011
## 11049_1__concoct_129_sub -4.474201
## 11049_1__concoct_161_sub 2.182570
## fairfax_1__BBG5486_21S000403
## 11049_1__concoct_009 -2.4100176
## 11049_1__concoct_027_sub 1.5585924
## 11049_1__concoct_068 0.7295171
## 11049_1__concoct_084 -3.6289465
## 11049_1__concoct_129_sub -5.3544565
## 11049_1__concoct_161_sub 2.1215802
## fairfax_2__BBG5487_21S000404
## 11049_1__concoct_009 -2.885069
## 11049_1__concoct_027_sub 3.576362
## 11049_1__concoct_068 1.253976
## 11049_1__concoct_084 -2.959177
## 11049_1__concoct_129_sub -4.864881
## 11049_1__concoct_161_sub 1.629238
## fairfax_3__BBG5488_21S000405
## 11049_1__concoct_009 -2.586107
## 11049_1__concoct_027_sub 4.040121
## 11049_1__concoct_068 1.626064
## 11049_1__concoct_084 -2.217937
## 11049_1__concoct_129_sub -4.372102
## 11049_1__concoct_161_sub 1.447264
## fairfax_4__BBG5489_21S000406
## 11049_1__concoct_009 -3.826443
## 11049_1__concoct_027_sub 3.755048
## 11049_1__concoct_068 0.801106
## 11049_1__concoct_084 -2.810069
## 11049_1__concoct_129_sub -4.979123
## 11049_1__concoct_161_sub 1.501560
## hoskyn_1__BBG5482_21S000400
## 11049_1__concoct_009 -4.418132
## 11049_1__concoct_027_sub 1.315816
## 11049_1__concoct_068 1.244612
## 11049_1__concoct_084 -2.241961
## 11049_1__concoct_129_sub -5.398961
## 11049_1__concoct_161_sub 1.920517
## hoskyn_2__BBG5483_21S000063
## 11049_1__concoct_009 -4.7476191
## 11049_1__concoct_027_sub 0.9485714
## 11049_1__concoct_068 1.3344266
## 11049_1__concoct_084 -1.8229755
## 11049_1__concoct_129_sub -5.2134934
## 11049_1__concoct_161_sub 2.2041728
## hoskyn_3__BBG5484_21S000401
## 11049_1__concoct_009 -4.0232455
## 11049_1__concoct_027_sub 3.8149736
## 11049_1__concoct_068 2.3738693
## 11049_1__concoct_084 -0.9432061
## 11049_1__concoct_129_sub -4.6800251
## 11049_1__concoct_161_sub 1.6561684
## hoskyn_4__BBG5485_21S000402
## 11049_1__concoct_009 -4.026879
## 11049_1__concoct_027_sub 1.007002
## 11049_1__concoct_068 1.239690
## 11049_1__concoct_084 -2.277679
## 11049_1__concoct_129_sub -5.045449
## 11049_1__concoct_161_sub 2.147948
## masthead_1__BBG5506_21S000419
## 11049_1__concoct_009 -4.8317524
## 11049_1__concoct_027_sub 0.3957847
## 11049_1__concoct_068 2.5608130
## 11049_1__concoct_084 -2.0103736
## 11049_1__concoct_129_sub -4.1058154
## 11049_1__concoct_161_sub 1.4711107
## masthead_2__BBG5507_21S000420
## 11049_1__concoct_009 -4.7625910
## 11049_1__concoct_027_sub -0.2604032
## 11049_1__concoct_068 2.5994649
## 11049_1__concoct_084 -2.2896605
## 11049_1__concoct_129_sub -4.2235945
## 11049_1__concoct_161_sub 1.9720856
## masthead_3__BBG5508_21S000421
## 11049_1__concoct_009 -4.3582787
## 11049_1__concoct_027_sub -0.1879129
## 11049_1__concoct_068 2.5129028
## 11049_1__concoct_084 -2.1756529
## 11049_1__concoct_129_sub -4.6034012
## 11049_1__concoct_161_sub 2.0871650
## masthead_4__BBG5509_21S000064
## 11049_1__concoct_009 -4.5814098
## 11049_1__concoct_027_sub -0.2741701
## 11049_1__concoct_068 2.8736233
## 11049_1__concoct_084 -1.8095567
## 11049_1__concoct_129_sub -4.4807663
## 11049_1__concoct_161_sub 2.2832292
## north_1__BBG5490_21S000072 north_2__BBG5491_21S000407
## 11049_1__concoct_009 -5.5166782 -5.3877846
## 11049_1__concoct_027_sub 0.8858729 0.9932791
## 11049_1__concoct_068 0.6831745 0.8713718
## 11049_1__concoct_084 -2.5941493 -3.2283004
## 11049_1__concoct_129_sub -4.4180659 -3.4578748
## 11049_1__concoct_161_sub 2.6259242 2.7265401
## north_3__BBG5492_21S000408 north_4__BBG5493_21S000409
## 11049_1__concoct_009 -5.783156 -5.4517141
## 11049_1__concoct_027_sub 2.372042 1.4374952
## 11049_1__concoct_068 1.056779 0.9470888
## 11049_1__concoct_084 -2.002218 -2.8677166
## 11049_1__concoct_129_sub -3.411578 -4.1989512
## 11049_1__concoct_161_sub 2.804656 2.8216544
## centipede_1__BBG6474 centipede_2__BBG6475
## 11049_1__concoct_009 -6.9089596 -6.4552769
## 11049_1__concoct_027_sub 4.3917437 6.3870630
## 11049_1__concoct_068 -2.1008485 -0.6088381
## 11049_1__concoct_084 -4.0467587 -3.4595446
## 11049_1__concoct_129_sub -3.6318148 -5.0689825
## 11049_1__concoct_161_sub -0.9262835 -1.6349953
## centipede_3__BBG6476 centipede_4__BBG6477
## 11049_1__concoct_009 -7.507232 -6.8592760
## 11049_1__concoct_027_sub 5.657658 5.7964349
## 11049_1__concoct_068 -1.431120 -0.9621221
## 11049_1__concoct_084 -4.275111 -2.9880750
## 11049_1__concoct_129_sub -4.416190 -4.0866873
## 11049_1__concoct_161_sub -1.292624 -1.5265572
## chicken_1__BBG6486_22S000883
## 11049_1__concoct_009 -6.408612
## 11049_1__concoct_027_sub 3.897217
## 11049_1__concoct_068 -3.324084
## 11049_1__concoct_084 -4.260178
## 11049_1__concoct_129_sub -2.825093
## 11049_1__concoct_161_sub -1.792078
## chicken_2__BBG6487_22S000884
## 11049_1__concoct_009 -5.408766
## 11049_1__concoct_027_sub 5.866088
## 11049_1__concoct_068 -1.996519
## 11049_1__concoct_084 -3.249282
## 11049_1__concoct_129_sub -3.799328
## 11049_1__concoct_161_sub -2.317724
## chicken_3__BBG6488_21S000078
## 11049_1__concoct_009 -5.997466
## 11049_1__concoct_027_sub 4.663995
## 11049_1__concoct_068 -2.884370
## 11049_1__concoct_084 -3.869684
## 11049_1__concoct_129_sub -3.001734
## 11049_1__concoct_161_sub -1.583183
## chicken_4__BBG6489_21S000501
## 11049_1__concoct_009 -5.614647
## 11049_1__concoct_027_sub 3.735021
## 11049_1__concoct_068 -3.157911
## 11049_1__concoct_084 -4.444575
## 11049_1__concoct_129_sub -3.032748
## 11049_1__concoct_161_sub -1.644355
## foreAndAft_1__BBG6494_21S000506
## 11049_1__concoct_009 -5.197126
## 11049_1__concoct_027_sub 5.527847
## 11049_1__concoct_068 -1.237818
## 11049_1__concoct_084 -4.530647
## 11049_1__concoct_129_sub -3.967178
## 11049_1__concoct_161_sub -1.350344
## foreAndAft_2__BBG6495_21S000507
## 11049_1__concoct_009 -5.047776
## 11049_1__concoct_027_sub 5.701018
## 11049_1__concoct_068 -1.155955
## 11049_1__concoct_084 -4.865454
## 11049_1__concoct_129_sub -3.884625
## 11049_1__concoct_161_sub -1.066227
## foreAndAft_3__BBG6496_21S000508
## 11049_1__concoct_009 -5.3712812
## 11049_1__concoct_027_sub 4.7154766
## 11049_1__concoct_068 -1.7307753
## 11049_1__concoct_084 -4.5239833
## 11049_1__concoct_129_sub -3.9049441
## 11049_1__concoct_161_sub -0.8422724
## foreAndAft_4__BBG6497_21S000509
## 11049_1__concoct_009 -5.3614746
## 11049_1__concoct_027_sub 4.5877721
## 11049_1__concoct_068 -1.8124959
## 11049_1__concoct_084 -5.2561140
## 11049_1__concoct_129_sub -4.3806453
## 11049_1__concoct_161_sub -0.9547553
## fork_1__BBG6466_21S000489 fork_2__BBG6467_21S000490
## 11049_1__concoct_009 -6.028429 -6.260061
## 11049_1__concoct_027_sub 3.347906 3.233502
## 11049_1__concoct_068 -3.498266 -3.585912
## 11049_1__concoct_084 -4.020961 -3.908685
## 11049_1__concoct_129_sub -2.761822 -3.001964
## 11049_1__concoct_161_sub -1.526154 -1.458091
## fork_3__BBG6468_21S000491 fork_4__BBG6469_21S000492
## 11049_1__concoct_009 -5.743402 -6.056514
## 11049_1__concoct_027_sub 3.314423 3.375288
## 11049_1__concoct_068 -3.619509 -3.561245
## 11049_1__concoct_084 -4.270096 -4.307314
## 11049_1__concoct_129_sub -2.818287 -2.878460
## 11049_1__concoct_161_sub -1.519892 -1.457613
## grub_1__BBG6482_21S000496 grub_2__BBG6483_21S000497
## 11049_1__concoct_009 -5.574611 -5.899169
## 11049_1__concoct_027_sub 3.860115 3.299470
## 11049_1__concoct_068 -3.162827 -3.027489
## 11049_1__concoct_084 -4.310919 -4.289731
## 11049_1__concoct_129_sub -3.102608 -3.018099
## 11049_1__concoct_161_sub -1.785191 -1.638839
## grub_3__BBG6484_21S000079 grub_4__BBG6485_22S000882
## 11049_1__concoct_009 -6.043031 -6.444599
## 11049_1__concoct_027_sub 3.429672 3.708284
## 11049_1__concoct_068 -3.069771 -3.036757
## 11049_1__concoct_084 -4.602669 -4.365158
## 11049_1__concoct_129_sub -3.048688 -2.812290
## 11049_1__concoct_161_sub -1.600706 -1.680718
## helix_1__BBG6478 helix_2__BBG6479 helix_3__BBG6480
## 11049_1__concoct_009 -5.626461 -6.115163 -6.114589
## 11049_1__concoct_027_sub 3.024038 2.496567 4.037262
## 11049_1__concoct_068 -3.061512 -2.730773 -1.409573
## 11049_1__concoct_084 -4.710170 -3.787885 -2.296877
## 11049_1__concoct_129_sub -2.448407 -1.984005 -2.875910
## 11049_1__concoct_161_sub -1.414333 -1.003175 -1.451150
## helix_4__BBG6481 johnBrewer_1__BBG6510_21S000521
## 11049_1__concoct_009 -7.2886206 -7.170177
## 11049_1__concoct_027_sub 2.4179240 3.974492
## 11049_1__concoct_068 -3.0294681 -1.476445
## 11049_1__concoct_084 -3.5873187 -5.224267
## 11049_1__concoct_129_sub -2.4134233 -4.605228
## 11049_1__concoct_161_sub -0.9741672 -1.041127
## johnBrewer_2__BBG6511_21S000522
## 11049_1__concoct_009 -6.3029365
## 11049_1__concoct_027_sub 4.9153360
## 11049_1__concoct_068 -0.7903145
## 11049_1__concoct_084 -3.7227197
## 11049_1__concoct_129_sub -4.6542779
## 11049_1__concoct_161_sub -1.2227751
## johnBrewer_3__BBG6512_22S000887
## 11049_1__concoct_009 -6.0067349
## 11049_1__concoct_027_sub 4.1862549
## 11049_1__concoct_068 -1.2171620
## 11049_1__concoct_084 -4.0971924
## 11049_1__concoct_129_sub -4.8280799
## 11049_1__concoct_161_sub -0.7823332
## johnBrewer_4__BBG6513_21S000524
## 11049_1__concoct_009 -6.9213761
## 11049_1__concoct_027_sub 3.8369947
## 11049_1__concoct_068 -1.5647899
## 11049_1__concoct_084 -4.6187910
## 11049_1__concoct_129_sub -5.3119382
## 11049_1__concoct_161_sub -0.9274147
## kelso_1__BBG6502_21S000514 kelso_2__BBG6503_21S000515
## 11049_1__concoct_009 -5.251685 -5.352297
## 11049_1__concoct_027_sub 4.884708 4.536768
## 11049_1__concoct_068 -1.717998 -2.195297
## 11049_1__concoct_084 -4.181243 -4.716308
## 11049_1__concoct_129_sub -3.676148 -3.617696
## 11049_1__concoct_161_sub -1.233801 -1.170247
## kelso_3__BBG6504_21S000516 kelso_4__BBG6505_21S000076
## 11049_1__concoct_009 -4.719254 -5.346904
## 11049_1__concoct_027_sub 4.712165 5.149558
## 11049_1__concoct_068 -2.037179 -1.634959
## 11049_1__concoct_084 -4.645146 -4.521268
## 11049_1__concoct_129_sub -3.603692 -3.927820
## 11049_1__concoct_161_sub -1.158062 -1.158513
## knife_1__BBG6462_22S000880 knife_2__BBG6463_22S000881
## 11049_1__concoct_009 -5.820239 -5.313119
## 11049_1__concoct_027_sub 3.449077 3.420757
## 11049_1__concoct_068 -3.367081 -3.585898
## 11049_1__concoct_084 -4.373320 -4.115415
## 11049_1__concoct_129_sub -2.642185 -2.892750
## 11049_1__concoct_161_sub -1.589763 -1.581419
## knife_3__BBG6464_21S000487 knife_4__BBG6465_21S000488
## 11049_1__concoct_009 -7.195504 -7.279832
## 11049_1__concoct_027_sub 3.710341 3.499645
## 11049_1__concoct_068 -3.345357 -3.367809
## 11049_1__concoct_084 -4.225090 -4.640775
## 11049_1__concoct_129_sub -2.712502 -3.045726
## 11049_1__concoct_161_sub -1.615775 -1.442102
## littleKelso_1__BBG6498_22S000885
## 11049_1__concoct_009 -6.297769
## 11049_1__concoct_027_sub 5.297372
## 11049_1__concoct_068 -1.042881
## 11049_1__concoct_084 -4.218327
## 11049_1__concoct_129_sub -4.425967
## 11049_1__concoct_161_sub -1.531330
## littleKelso_2__BBG6499_21S000511
## 11049_1__concoct_009 -5.008529
## 11049_1__concoct_027_sub 5.205202
## 11049_1__concoct_068 -1.296584
## 11049_1__concoct_084 -4.720847
## 11049_1__concoct_129_sub -3.526924
## 11049_1__concoct_161_sub -1.035724
## littleKelso_3__BBG6500_21S000512
## 11049_1__concoct_009 -4.836794
## 11049_1__concoct_027_sub 4.733223
## 11049_1__concoct_068 -1.499839
## 11049_1__concoct_084 -4.325969
## 11049_1__concoct_129_sub -3.613019
## 11049_1__concoct_161_sub -1.034586
## littleKelso_4__BBG6501_22S000886
## 11049_1__concoct_009 -4.9803174
## 11049_1__concoct_027_sub 4.3959187
## 11049_1__concoct_068 -1.7465529
## 11049_1__concoct_084 -4.2265456
## 11049_1__concoct_129_sub -4.0312368
## 11049_1__concoct_161_sub -0.9321622
## lynchs_1__BBG6514_21S000525
## 11049_1__concoct_009 -6.503589
## 11049_1__concoct_027_sub 4.032791
## 11049_1__concoct_068 -2.269483
## 11049_1__concoct_084 -3.459067
## 11049_1__concoct_129_sub -4.424148
## 11049_1__concoct_161_sub -1.492954
## lynchs_2__BBG6515_22S000888
## 11049_1__concoct_009 -6.199068
## 11049_1__concoct_027_sub 5.037838
## 11049_1__concoct_068 -1.905872
## 11049_1__concoct_084 -3.748062
## 11049_1__concoct_129_sub -4.673011
## 11049_1__concoct_161_sub -1.515086
## lynchs_3__BBG6516_21S000527
## 11049_1__concoct_009 -7.241817
## 11049_1__concoct_027_sub 5.755158
## 11049_1__concoct_068 -1.407006
## 11049_1__concoct_084 -3.807830
## 11049_1__concoct_129_sub -5.450058
## 11049_1__concoct_161_sub -1.953550
## lynchs_4__BBG6517_21S000528
## 11049_1__concoct_009 -6.141683
## 11049_1__concoct_027_sub 4.278035
## 11049_1__concoct_068 -2.014549
## 11049_1__concoct_084 -3.944459
## 11049_1__concoct_129_sub -3.502626
## 11049_1__concoct_161_sub -1.405485
## myrmidon_1__BBG6470_21S000493
## 11049_1__concoct_009 -6.2201597
## 11049_1__concoct_027_sub -0.8040593
## 11049_1__concoct_068 -3.7352531
## 11049_1__concoct_084 -3.8222645
## 11049_1__concoct_129_sub -1.8134405
## 11049_1__concoct_161_sub -0.9369560
## myrmidon_2__BBG6471_21S000494
## 11049_1__concoct_009 -7.2302079
## 11049_1__concoct_027_sub -0.6128049
## 11049_1__concoct_068 -3.9343710
## 11049_1__concoct_084 -4.4576191
## 11049_1__concoct_129_sub -2.3175530
## 11049_1__concoct_161_sub -0.8382907
## myrmidon_3__BBG6472_21S000077
## 11049_1__concoct_009 -7.1710099
## 11049_1__concoct_027_sub -0.5704594
## 11049_1__concoct_068 -4.0140095
## 11049_1__concoct_084 -4.0574946
## 11049_1__concoct_129_sub -1.9615238
## 11049_1__concoct_161_sub -0.8262511
## myrmidon_4__BBG6473_21S000495 rib_1__BBG6490_21S000502
## 11049_1__concoct_009 -5.5566682 -5.3012243
## 11049_1__concoct_027_sub -0.3918822 5.0389545
## 11049_1__concoct_068 -4.7093703 -0.8647301
## 11049_1__concoct_084 -3.9472303 -4.4854748
## 11049_1__concoct_129_sub -1.8677888 -4.5865710
## 11049_1__concoct_161_sub -0.7553831 -0.6098765
## rib_2__BBG6491_21S000503 rib_3__BBG6492_21S000504
## 11049_1__concoct_009 -4.8239016 -5.9172183
## 11049_1__concoct_027_sub 4.9450912 5.6382781
## 11049_1__concoct_068 -0.9199108 -0.5327232
## 11049_1__concoct_084 -5.3347272 -5.2240711
## 11049_1__concoct_129_sub -4.6007581 -4.1944517
## 11049_1__concoct_161_sub -0.7345696 -0.9336116
## rib_4__BBG6493_21S000505 roxburgh_1__BBG6506_21S000517
## 11049_1__concoct_009 -4.5718556 -4.702599
## 11049_1__concoct_027_sub 4.9865964 5.143267
## 11049_1__concoct_068 -1.0000538 -1.225564
## 11049_1__concoct_084 -4.5401069 -4.849203
## 11049_1__concoct_129_sub -4.1346418 -3.901821
## 11049_1__concoct_161_sub -0.6715135 -1.109030
## roxburgh_2__BBG6507_21S000518
## 11049_1__concoct_009 -4.883735
## 11049_1__concoct_027_sub 5.027373
## 11049_1__concoct_068 -1.363789
## 11049_1__concoct_084 -4.761133
## 11049_1__concoct_129_sub -4.048938
## 11049_1__concoct_161_sub -1.132698
## roxburgh_3__BBG6508_21S000519
## 11049_1__concoct_009 -4.962132
## 11049_1__concoct_027_sub 4.116002
## 11049_1__concoct_068 -1.389786
## 11049_1__concoct_084 -4.045841
## 11049_1__concoct_129_sub -4.129223
## 11049_1__concoct_161_sub -1.082632
## roxburgh_4__BBG6509_21S000520
## 11049_1__concoct_009 -5.016228
## 11049_1__concoct_027_sub 5.257095
## 11049_1__concoct_068 -1.029893
## 11049_1__concoct_084 -4.099938
## 11049_1__concoct_129_sub -4.659553
## 11049_1__concoct_161_sub -1.178313
## farquaharson_1__BBG6418_21S000448
## 11049_1__concoct_009 -1.5090073
## 11049_1__concoct_027_sub 4.4148569
## 11049_1__concoct_068 -1.6705518
## 11049_1__concoct_084 -5.0027563
## 11049_1__concoct_129_sub -3.3752999
## 11049_1__concoct_161_sub 0.6232284
## farquaharson_2__BBG6419_22S000874
## 11049_1__concoct_009 -1.6930765
## 11049_1__concoct_027_sub 4.1126189
## 11049_1__concoct_068 -1.7361757
## 11049_1__concoct_084 -4.2580259
## 11049_1__concoct_129_sub -2.8717315
## 11049_1__concoct_161_sub 0.7986308
## farquaharson_3__BBG6420_21S000450
## 11049_1__concoct_009 -1.4477020
## 11049_1__concoct_027_sub 4.2177010
## 11049_1__concoct_068 -1.2798816
## 11049_1__concoct_084 -4.3215989
## 11049_1__concoct_129_sub -3.5438944
## 11049_1__concoct_161_sub 0.4829041
## farquaharson_4__BBG6421_22S000875 feather_1__BBG6422
## 11049_1__concoct_009 -1.6148564 0.3190216
## 11049_1__concoct_027_sub 3.6138539 3.0635798
## 11049_1__concoct_068 -1.5448125 -2.3377353
## 11049_1__concoct_084 -4.8088088 -5.8492807
## 11049_1__concoct_129_sub -2.4611032 -4.3733742
## 11049_1__concoct_161_sub 0.9522746 0.7504197
## feather_2__BBG6423 feather_3__BBG6424
## 11049_1__concoct_009 0.1213511 -0.6167127
## 11049_1__concoct_027_sub 3.1194361 2.7660431
## 11049_1__concoct_068 -2.6263217 -2.4006901
## 11049_1__concoct_084 -5.2412815 -5.0632779
## 11049_1__concoct_129_sub -3.7319271 -3.7051544
## 11049_1__concoct_161_sub 0.8104715 0.9189777
## feather_4__BBG6425 hedley_1__BBG6438_21S000463
## 11049_1__concoct_009 0.1090817 2.594059
## 11049_1__concoct_027_sub 2.6582062 1.901518
## 11049_1__concoct_068 -2.7165903 -1.911089
## 11049_1__concoct_084 -5.7879600 -5.624661
## 11049_1__concoct_129_sub -3.8275778 -4.613060
## 11049_1__concoct_161_sub 0.7325409 1.854639
## hedley_3__BBG6439_21S000464
## 11049_1__concoct_009 2.8342211
## 11049_1__concoct_027_sub 2.6742251
## 11049_1__concoct_068 -2.1230465
## 11049_1__concoct_084 -5.3920721
## 11049_1__concoct_129_sub -4.2777115
## 11049_1__concoct_161_sub 0.5291232
## hedley_4__BBG6440_21S000465
## 11049_1__concoct_009 2.373548
## 11049_1__concoct_027_sub 2.694616
## 11049_1__concoct_068 -1.899532
## 11049_1__concoct_084 -5.375631
## 11049_1__concoct_129_sub -4.682483
## 11049_1__concoct_161_sub 2.264706
## mcCulloch_1__BBG6434_21S000459
## 11049_1__concoct_009 2.019779
## 11049_1__concoct_027_sub 2.456426
## 11049_1__concoct_068 -1.976125
## 11049_1__concoct_084 -4.835725
## 11049_1__concoct_129_sub -4.098126
## 11049_1__concoct_161_sub 2.504344
## mcCulloch_2__BBG6435_21S000460
## 11049_1__concoct_009 2.163404
## 11049_1__concoct_027_sub 3.036309
## 11049_1__concoct_068 -2.100115
## 11049_1__concoct_084 -5.161805
## 11049_1__concoct_129_sub -4.263864
## 11049_1__concoct_161_sub 2.056771
## mcCulloch_3__BBG6436_21S000461
## 11049_1__concoct_009 1.813805
## 11049_1__concoct_027_sub 4.800722
## 11049_1__concoct_068 -1.358892
## 11049_1__concoct_084 -5.582070
## 11049_1__concoct_129_sub -4.077992
## 11049_1__concoct_161_sub 1.390771
## mcCulloch_4__BBG6437_21S000462
## 11049_1__concoct_009 2.291414
## 11049_1__concoct_027_sub 2.689377
## 11049_1__concoct_068 -2.101229
## 11049_1__concoct_084 -4.844997
## 11049_1__concoct_129_sub -4.024016
## 11049_1__concoct_161_sub 2.416930
## moore_1__BBG6442_22S000877 moore_2__BBG6443_21S000468
## 11049_1__concoct_009 0.06895707 0.4757257
## 11049_1__concoct_027_sub 1.50729345 2.3291147
## 11049_1__concoct_068 -2.08620265 -2.1613698
## 11049_1__concoct_084 -5.10662754 -5.5564881
## 11049_1__concoct_129_sub -4.92430598 -4.4578758
## 11049_1__concoct_161_sub 0.88082899 0.8210116
## moore_3__BBG6444_21S000469 moore_4__BBG6445_21S000470
## 11049_1__concoct_009 0.3481784 0.5932615
## 11049_1__concoct_027_sub 2.5241204 2.3941262
## 11049_1__concoct_068 -2.5518468 -2.4688657
## 11049_1__concoct_084 -4.7490714 -4.7447825
## 11049_1__concoct_129_sub -4.1692529 -4.5537273
## 11049_1__concoct_161_sub 0.9492199 0.9344915
## Peart_1__BBG6430_21S000455 Peart_2__BBG6431_21S000456
## 11049_1__concoct_009 1.984284 1.909855
## 11049_1__concoct_027_sub 3.100242 2.788601
## 11049_1__concoct_068 -2.243349 -2.263342
## 11049_1__concoct_084 -6.470183 -5.670080
## 11049_1__concoct_129_sub -3.831125 -3.996103
## 11049_1__concoct_161_sub 2.463745 2.467755
## Peart_3__BBG6432_21S000457 Peart_4__BBG6433_21S000458
## 11049_1__concoct_009 2.291990 1.883065
## 11049_1__concoct_027_sub 3.046085 2.891615
## 11049_1__concoct_068 -2.023893 -2.090912
## 11049_1__concoct_084 -6.393341 -5.919553
## 11049_1__concoct_129_sub -3.828392 -4.310115
## 11049_1__concoct_161_sub 2.392810 2.569909
## taylor_1__BBG6426_22S000876
## 11049_1__concoct_009 -1.942922
## 11049_1__concoct_027_sub 2.903325
## 11049_1__concoct_068 -1.701057
## 11049_1__concoct_084 -5.228964
## 11049_1__concoct_129_sub -3.196669
## 11049_1__concoct_161_sub 0.507778
## taylor_2__BBG6427_21S000453
## 11049_1__concoct_009 -1.9394992
## 11049_1__concoct_027_sub 3.2053383
## 11049_1__concoct_068 -2.0339377
## 11049_1__concoct_084 -5.7006993
## 11049_1__concoct_129_sub -3.5904861
## 11049_1__concoct_161_sub 0.2935738
## taylor_3__BBG6428_21S000454
## 11049_1__concoct_009 -1.6617500
## 11049_1__concoct_027_sub 2.8833861
## 11049_1__concoct_068 -2.0004176
## 11049_1__concoct_084 -5.5870182
## 11049_1__concoct_129_sub -3.4275340
## 11049_1__concoct_161_sub 0.3991546
## taylor_4__BBG6429_21S000075
## 11049_1__concoct_009 -1.8730318
## 11049_1__concoct_027_sub 3.5122215
## 11049_1__concoct_068 -1.5448398
## 11049_1__concoct_084 -5.1373882
## 11049_1__concoct_129_sub -3.3873014
## 11049_1__concoct_161_sub 0.3713389
### IMPORTANT: use pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains only for MINT!!
# CLR transforming only within Swains - use this object for PCA
pMAGs_95ANI_phyloseq_clr_Swains_only <- microbiome::transform(pMAGs_95ANI_phyloseq_Swains_only, transform = "clr")
# This object pMAGs_95ANI_phyloseq_clr_Swains_only will be to validate the model
# Extracting the dataframe
MAGs_no_Swains <- as.data.frame(pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains@otu_table)
MAGs_no_Swains <- t(MAGs_no_Swains)
# Check dimensions of data
dim(MAGs_no_Swains)
## [1] 170 876
class(MAGs_no_Swains)
## [1] "matrix" "array"
mint.splsda.open.closed_no_Swains <- mint.splsda(X = MAGs_no_Swains,
Y = pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains@sam_data$Open_or_Closed_to_fishing,
study = pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains@sam_data$SECTOR_N_S,
ncomp = 10)
perf.mint.splsda.open.closed_no_Swains <- perf(mint.splsda.open.closed_no_Swains)
plot(perf.mint.splsda.open.closed_no_Swains)
Choosing the number of components in mint.splsda using perf() with LOGOCV to discriminate between reefs that are open or closed to fishing. Classification error rates (overall and balanced - BER) are represented on the y-axis with respect to the number of components on the x-axis for each prediction distance. Overall and balanced error rates show the same trend as the design is balanced (i.e. the same number of protected and non-protected reefs in each sampling trip). The plot shows that the error rate reaches a minimum with three dimensions with the centroids prediction distance. We therefore retained 3 PCs in downstream analysis.
Let’s see the overlap of selected features between MINT sPLS-DA models when study = Sampling_trip vs Sector.
# making sure that similar taxa are selected in MINT sPLS-DA models when I select TRIP or SECTOR as a study
mint.splsda.open.closed.trip.100 <- mint.splsda(X = MAGs,
Y = pMAGs_95ANI_phyloseq_clr@sam_data$Open_or_Closed_to_fishing,
study = pMAGs_95ANI_phyloseq_clr@sam_data$Sampling_trip,
keepX = c(100, 10),
ncomp = 2)
mint.splsda.open.closed.sector.100 <- mint.splsda(X = MAGs,
Y = pMAGs_95ANI_phyloseq_clr@sam_data$Open_or_Closed_to_fishing,
study = pMAGs_95ANI_phyloseq_clr@sam_data$SECTOR_N_S,
keepX = c(100, 10),
ncomp = 2)
# Extracting the 100 indicators
mint.splsda.open.closed.trip.100.vars <- selectVar(mint.splsda.open.closed.trip.100, comp = 1)$name
mint.splsda.open.closed.sector.100.vars <- selectVar(mint.splsda.open.closed.sector.100, comp = 1)$name
### Ready for a Venn diagram now:
# Find the intersection (shared features) between the two
shared_features <- intersect(mint.splsda.open.closed.trip.100.vars, mint.splsda.open.closed.sector.100.vars)
# Create a Venn diagram
venn.plot <- venn.diagram(
x = list(
"Trip" = mint.splsda.open.closed.trip.100.vars,
"Sector" = mint.splsda.open.closed.sector.100.vars
),
category.names = c("Trip", "Sector"),
filename = NULL, # Do not save to file, just plot it
output = TRUE
)
# Plot the Venn diagram
grid.draw(venn.plot)
# If you want to see the number of shared features
cat("Number of shared features: ", length(shared_features), "\n")
## Number of shared features: 69
As the results are largely similar, we will continue with sector-specific MINT (sPLS-DA and sPLS) from now on.
Also checking the overlap between MINT sPLS-DA on 7 and 6 sectors (i.e., no Swains)
# making sure that similar taxa are selected in MINT sPLS-DA models when I select TRIP or SECTOR as a study
mint.splsda.open.closed.6sectors.100 <- mint.splsda(X = MAGs_no_Swains,
Y = pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains@sam_data$Open_or_Closed_to_fishing,
study = pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains@sam_data$SECTOR_N_S,
keepX = c(100, 10),
ncomp = 2)
mint.splsda.open.closed.7sectors.100 <- mint.splsda(X = MAGs_MINT,
Y = pMAGs_95ANI_phyloseq_CLR_per_sector@sam_data$Open_or_Closed_to_fishing,
study = pMAGs_95ANI_phyloseq_CLR_per_sector@sam_data$SECTOR_N_S,
keepX = c(100, 10),
ncomp = 2)
# Extracting the 100 indicators
mint.splsda.open.closed.6sectors.100.vars <- selectVar(mint.splsda.open.closed.6sectors.100, comp = 1)$name
mint.splsda.open.closed.7sectors.100.vars <- selectVar(mint.splsda.open.closed.7sectors.100, comp = 1)$name
### Ready for a Venn diagram now:
# Find the intersection (shared features) between the two
shared_features <- intersect(mint.splsda.open.closed.6sectors.100.vars,
mint.splsda.open.closed.7sectors.100.vars)
# Create a Venn diagram
venn.plot <- venn.diagram(
x = list(
"MINT sPLS-DA no Swains" = mint.splsda.open.closed.6sectors.100.vars,
"MINT sPLS-DA all sectors" = mint.splsda.open.closed.7sectors.100.vars
),
category.names = c("6 sectors", "7 sectors"),
filename = NULL, # Do not save to file, just plot it
output = TRUE
)
# Plot the Venn diagram
grid.draw(venn.plot)
# If you want to see the number of shared features
cat("Number of shared features: ", length(shared_features), "\n")
## Number of shared features: 84
Again, largely similar results, but we will present a model with all seven sectors in the final manuscript (even though the design is unbalanced in Swains, but this is not an issue for MINT)
perf.mint.splsda.open.closed.sector$global.error$BER
## max.dist centroids.dist mahalanobis.dist
## comp1 0.2872243 0.2965664 0.2965664
## comp2 0.2956015 0.2823120 0.2988910
## comp3 0.3079292 0.2399875 0.3070269
## comp4 0.3792074 0.2868296 0.3562907
## comp5 0.4125877 0.2702506 0.4138158
## comp6 0.3964693 0.2745269 0.3888158
## comp7 0.3987061 0.2755138 0.4039693
## comp8 0.4161184 0.2726629 0.4152162
## comp9 0.4212281 0.2792419 0.4278916
## comp10 0.4204605 0.2792419 0.4382675
But it may also be interesting too see the stats on this within sectors - is the error higher in some sectors compared to the others?
# Initialize an empty list to store the results
study_errors_overall <- list()
# Loop through each study in the 'study.specific.error' section
for (study_name in names(perf.mint.splsda.open.closed.sector[["study.specific.error"]])) {
# Extract the error rate for the current study
study_error_overall <- perf.mint.splsda.open.closed.sector$study.specific.error[[study_name]]$overall
# Add the study name as a column to the error data
study_error_with_name_overall <- cbind(Study = study_name, study_error_overall)
# Store the error data in the list
study_errors_overall[[study_name]] <- study_error_with_name_overall
}
# Combine all study data into a single data frame
study_error_df_overall <- do.call(rbind, study_errors_overall)
# Display the table
knitr::kable(as.data.frame(study_error_df_overall), caption = "MINT sPLS-DA - error rate (centroids distance) across GBR sectors")
| Study | max.dist | centroids.dist | mahalanobis.dist | |
|---|---|---|---|---|
| comp1 | 01_Cape_Grenville | 0.416666666666667 | 0.416666666666667 | 0.416666666666667 |
| comp2 | 01_Cape_Grenville | 0.541666666666667 | 0.375 | 0.541666666666667 |
| comp3 | 01_Cape_Grenville | 0.625 | 0.333333333333333 | 0.625 |
| comp4 | 01_Cape_Grenville | 0.541666666666667 | 0.375 | 0.458333333333333 |
| comp5 | 01_Cape_Grenville | 0.625 | 0.333333333333333 | 0.666666666666667 |
| comp6 | 01_Cape_Grenville | 0.541666666666667 | 0.333333333333333 | 0.541666666666667 |
| comp7 | 01_Cape_Grenville | 0.583333333333333 | 0.333333333333333 | 0.625 |
| comp8 | 01_Cape_Grenville | 0.583333333333333 | 0.333333333333333 | 0.583333333333333 |
| comp9 | 01_Cape_Grenville | 0.625 | 0.333333333333333 | 0.583333333333333 |
| comp10 | 01_Cape_Grenville | 0.625 | 0.333333333333333 | 0.625 |
| comp1.1 | 02_Princess_Charlotte_bay | 0.2 | 0.2 | 0.2 |
| comp2.1 | 02_Princess_Charlotte_bay | 0.6 | 0.2 | 0.6 |
| comp3.1 | 02_Princess_Charlotte_bay | 0.6 | 0.2 | 0.533333333333333 |
| comp4.1 | 02_Princess_Charlotte_bay | 0.866666666666667 | 0.2 | 0.933333333333333 |
| comp5.1 | 02_Princess_Charlotte_bay | 1 | 0.2 | 1 |
| comp6.1 | 02_Princess_Charlotte_bay | 0.933333333333333 | 0.266666666666667 | 1 |
| comp7.1 | 02_Princess_Charlotte_bay | 0.8 | 0.2 | 0.8 |
| comp8.1 | 02_Princess_Charlotte_bay | 0.8 | 0.2 | 0.733333333333333 |
| comp9.1 | 02_Princess_Charlotte_bay | 0.866666666666667 | 0.2 | 0.866666666666667 |
| comp10.1 | 02_Princess_Charlotte_bay | 0.933333333333333 | 0.2 | 0.933333333333333 |
| comp1.2 | 03_Cairns | 0.35 | 0.35 | 0.35 |
| comp2.2 | 03_Cairns | 0.05 | 0.25 | 0.05 |
| comp3.2 | 03_Cairns | 0.2 | 0.3 | 0.2 |
| comp4.2 | 03_Cairns | 0.35 | 0.3 | 0.35 |
| comp5.2 | 03_Cairns | 0.35 | 0.3 | 0.35 |
| comp6.2 | 03_Cairns | 0.3 | 0.3 | 0.3 |
| comp7.2 | 03_Cairns | 0.2 | 0.3 | 0.2 |
| comp8.2 | 03_Cairns | 0.25 | 0.3 | 0.25 |
| comp9.2 | 03_Cairns | 0.25 | 0.35 | 0.25 |
| comp10.2 | 03_Cairns | 0.15 | 0.35 | 0.2 |
| comp1.3 | 04_Innisfail | 0.0740740740740741 | 0.111111111111111 | 0.111111111111111 |
| comp2.3 | 04_Innisfail | 0.259259259259259 | 0.148148148148148 | 0.259259259259259 |
| comp3.3 | 04_Innisfail | 0.259259259259259 | 0.148148148148148 | 0.296296296296296 |
| comp4.3 | 04_Innisfail | 0.37037037037037 | 0.222222222222222 | 0.333333333333333 |
| comp5.3 | 04_Innisfail | 0.37037037037037 | 0.185185185185185 | 0.333333333333333 |
| comp6.3 | 04_Innisfail | 0.259259259259259 | 0.148148148148148 | 0.296296296296296 |
| comp7.3 | 04_Innisfail | 0.444444444444444 | 0.185185185185185 | 0.444444444444444 |
| comp8.3 | 04_Innisfail | 0.407407407407407 | 0.185185185185185 | 0.444444444444444 |
| comp9.3 | 04_Innisfail | 0.37037037037037 | 0.185185185185185 | 0.407407407407407 |
| comp10.3 | 04_Innisfail | 0.37037037037037 | 0.185185185185185 | 0.444444444444444 |
| comp1.4 | 05_Townsville | 0.303571428571429 | 0.303571428571429 | 0.303571428571429 |
| comp2.4 | 05_Townsville | 0.267857142857143 | 0.285714285714286 | 0.267857142857143 |
| comp3.4 | 05_Townsville | 0.285714285714286 | 0.232142857142857 | 0.285714285714286 |
| comp4.4 | 05_Townsville | 0.285714285714286 | 0.25 | 0.267857142857143 |
| comp5.4 | 05_Townsville | 0.285714285714286 | 0.25 | 0.285714285714286 |
| comp6.4 | 05_Townsville | 0.392857142857143 | 0.267857142857143 | 0.339285714285714 |
| comp7.4 | 05_Townsville | 0.339285714285714 | 0.267857142857143 | 0.339285714285714 |
| comp8.4 | 05_Townsville | 0.357142857142857 | 0.267857142857143 | 0.357142857142857 |
| comp9.4 | 05_Townsville | 0.357142857142857 | 0.267857142857143 | 0.357142857142857 |
| comp10.4 | 05_Townsville | 0.357142857142857 | 0.267857142857143 | 0.357142857142857 |
| comp1.5 | 06_Swains | 0.45 | 0.45 | 0.45 |
| comp2.5 | 06_Swains | 0.25 | 0.4 | 0.3 |
| comp3.5 | 06_Swains | 0.15 | 0.3 | 0.15 |
| comp4.5 | 06_Swains | 0.25 | 0.4 | 0.25 |
| comp5.5 | 06_Swains | 0.25 | 0.3 | 0.25 |
| comp6.5 | 06_Swains | 0.2 | 0.3 | 0.2 |
| comp7.5 | 06_Swains | 0.25 | 0.3 | 0.25 |
| comp8.5 | 06_Swains | 0.15 | 0.35 | 0.15 |
| comp9.5 | 06_Swains | 0.15 | 0.35 | 0.2 |
| comp10.5 | 06_Swains | 0.3 | 0.35 | 0.35 |
| comp1.6 | 07_Capricorn_Bunker | 0.285714285714286 | 0.321428571428571 | 0.321428571428571 |
| comp2.6 | 07_Capricorn_Bunker | 0.285714285714286 | 0.357142857142857 | 0.285714285714286 |
| comp3.6 | 07_Capricorn_Bunker | 0.214285714285714 | 0.285714285714286 | 0.214285714285714 |
| comp4.6 | 07_Capricorn_Bunker | 0.357142857142857 | 0.357142857142857 | 0.321428571428571 |
| comp5.6 | 07_Capricorn_Bunker | 0.428571428571429 | 0.357142857142857 | 0.464285714285714 |
| comp6.6 | 07_Capricorn_Bunker | 0.392857142857143 | 0.357142857142857 | 0.392857142857143 |
| comp7.6 | 07_Capricorn_Bunker | 0.428571428571429 | 0.357142857142857 | 0.428571428571429 |
| comp8.6 | 07_Capricorn_Bunker | 0.535714285714286 | 0.321428571428571 | 0.535714285714286 |
| comp9.6 | 07_Capricorn_Bunker | 0.535714285714286 | 0.321428571428571 | 0.571428571428571 |
| comp10.6 | 07_Capricorn_Bunker | 0.5 | 0.321428571428571 | 0.5 |
# Initialize an empty list to store the results
study_errors_overall_no_Swains <- list()
# Loop through each study in the 'study.specific.error' section
for (study_name in names(perf.mint.splsda.open.closed_no_Swains[["study.specific.error"]])) {
# Extract the error rate for the current study
study_error_overall <- perf.mint.splsda.open.closed_no_Swains$study.specific.error[[study_name]]$overall # <- Fixed variable name
# Add the study name as a column to the error data
study_error_with_name_overall <- cbind(Study = study_name, study_error_overall) # <- Fixed variable name
# Store the error data in the list
study_errors_overall_no_Swains[[study_name]] <- study_error_with_name_overall
}
# Combine all study data into a single data frame
study_error_df_overall_no_Swains <- do.call(rbind, study_errors_overall_no_Swains)
# Display the table
knitr::kable(as.data.frame(study_error_df_overall_no_Swains), caption = "MINT sPLS-DA - error rate (centroids distance) across six GBR sectors (Swains excluded)")
| Study | max.dist | centroids.dist | mahalanobis.dist | |
|---|---|---|---|---|
| comp1 | 01_Cape_Grenville | 0.416666666666667 | 0.416666666666667 | 0.416666666666667 |
| comp2 | 01_Cape_Grenville | 0.5 | 0.416666666666667 | 0.5 |
| comp3 | 01_Cape_Grenville | 0.583333333333333 | 0.375 | 0.5 |
| comp4 | 01_Cape_Grenville | 0.458333333333333 | 0.458333333333333 | 0.458333333333333 |
| comp5 | 01_Cape_Grenville | 0.625 | 0.375 | 0.583333333333333 |
| comp6 | 01_Cape_Grenville | 0.5 | 0.375 | 0.541666666666667 |
| comp7 | 01_Cape_Grenville | 0.541666666666667 | 0.375 | 0.583333333333333 |
| comp8 | 01_Cape_Grenville | 0.625 | 0.416666666666667 | 0.625 |
| comp9 | 01_Cape_Grenville | 0.583333333333333 | 0.416666666666667 | 0.625 |
| comp10 | 01_Cape_Grenville | 0.625 | 0.375 | 0.625 |
| comp1.1 | 02_Princess_Charlotte_bay | 0.2 | 0.2 | 0.2 |
| comp2.1 | 02_Princess_Charlotte_bay | 0.666666666666667 | 0.2 | 0.666666666666667 |
| comp3.1 | 02_Princess_Charlotte_bay | 0.533333333333333 | 0.2 | 0.533333333333333 |
| comp4.1 | 02_Princess_Charlotte_bay | 0.933333333333333 | 0.2 | 0.933333333333333 |
| comp5.1 | 02_Princess_Charlotte_bay | 1 | 0.2 | 1 |
| comp6.1 | 02_Princess_Charlotte_bay | 1 | 0.2 | 1 |
| comp7.1 | 02_Princess_Charlotte_bay | 1 | 0.266666666666667 | 1 |
| comp8.1 | 02_Princess_Charlotte_bay | 1 | 0.2 | 0.933333333333333 |
| comp9.1 | 02_Princess_Charlotte_bay | 1 | 0.2 | 0.866666666666667 |
| comp10.1 | 02_Princess_Charlotte_bay | 1 | 0.2 | 1 |
| comp1.2 | 03_Cairns | 0.35 | 0.35 | 0.35 |
| comp2.2 | 03_Cairns | 0.05 | 0.35 | 0.05 |
| comp3.2 | 03_Cairns | 0.25 | 0.3 | 0.25 |
| comp4.2 | 03_Cairns | 0.25 | 0.3 | 0.3 |
| comp5.2 | 03_Cairns | 0.35 | 0.3 | 0.35 |
| comp6.2 | 03_Cairns | 0.3 | 0.3 | 0.3 |
| comp7.2 | 03_Cairns | 0.4 | 0.3 | 0.35 |
| comp8.2 | 03_Cairns | 0.35 | 0.3 | 0.4 |
| comp9.2 | 03_Cairns | 0.4 | 0.35 | 0.45 |
| comp10.2 | 03_Cairns | 0.35 | 0.35 | 0.4 |
| comp1.3 | 04_Innisfail | 0.111111111111111 | 0.111111111111111 | 0.111111111111111 |
| comp2.3 | 04_Innisfail | 0.259259259259259 | 0.148148148148148 | 0.259259259259259 |
| comp3.3 | 04_Innisfail | 0.296296296296296 | 0.185185185185185 | 0.296296296296296 |
| comp4.3 | 04_Innisfail | 0.407407407407407 | 0.185185185185185 | 0.407407407407407 |
| comp5.3 | 04_Innisfail | 0.37037037037037 | 0.185185185185185 | 0.37037037037037 |
| comp6.3 | 04_Innisfail | 0.296296296296296 | 0.185185185185185 | 0.296296296296296 |
| comp7.3 | 04_Innisfail | 0.407407407407407 | 0.185185185185185 | 0.407407407407407 |
| comp8.3 | 04_Innisfail | 0.407407407407407 | 0.185185185185185 | 0.407407407407407 |
| comp9.3 | 04_Innisfail | 0.444444444444444 | 0.185185185185185 | 0.444444444444444 |
| comp10.3 | 04_Innisfail | 0.37037037037037 | 0.185185185185185 | 0.37037037037037 |
| comp1.4 | 05_Townsville | 0.303571428571429 | 0.303571428571429 | 0.303571428571429 |
| comp2.4 | 05_Townsville | 0.285714285714286 | 0.321428571428571 | 0.303571428571429 |
| comp3.4 | 05_Townsville | 0.285714285714286 | 0.267857142857143 | 0.267857142857143 |
| comp4.4 | 05_Townsville | 0.303571428571429 | 0.267857142857143 | 0.321428571428571 |
| comp5.4 | 05_Townsville | 0.285714285714286 | 0.285714285714286 | 0.303571428571429 |
| comp6.4 | 05_Townsville | 0.375 | 0.285714285714286 | 0.392857142857143 |
| comp7.4 | 05_Townsville | 0.375 | 0.303571428571429 | 0.410714285714286 |
| comp8.4 | 05_Townsville | 0.339285714285714 | 0.267857142857143 | 0.339285714285714 |
| comp9.4 | 05_Townsville | 0.339285714285714 | 0.267857142857143 | 0.339285714285714 |
| comp10.4 | 05_Townsville | 0.339285714285714 | 0.267857142857143 | 0.357142857142857 |
| comp1.5 | 07_Capricorn_Bunker | 0.321428571428571 | 0.321428571428571 | 0.321428571428571 |
| comp2.5 | 07_Capricorn_Bunker | 0.392857142857143 | 0.321428571428571 | 0.392857142857143 |
| comp3.5 | 07_Capricorn_Bunker | 0.285714285714286 | 0.321428571428571 | 0.285714285714286 |
| comp4.5 | 07_Capricorn_Bunker | 0.392857142857143 | 0.321428571428571 | 0.392857142857143 |
| comp5.5 | 07_Capricorn_Bunker | 0.428571428571429 | 0.321428571428571 | 0.428571428571429 |
| comp6.5 | 07_Capricorn_Bunker | 0.285714285714286 | 0.321428571428571 | 0.285714285714286 |
| comp7.5 | 07_Capricorn_Bunker | 0.357142857142857 | 0.321428571428571 | 0.357142857142857 |
| comp8.5 | 07_Capricorn_Bunker | 0.321428571428571 | 0.321428571428571 | 0.321428571428571 |
| comp9.5 | 07_Capricorn_Bunker | 0.392857142857143 | 0.321428571428571 | 0.392857142857143 |
| comp10.5 | 07_Capricorn_Bunker | 0.321428571428571 | 0.321428571428571 | 0.321428571428571 |
# Initialize an empty list to store the results
study_errors <- list()
# Loop through each study in the 'study.specific.error' section
for (study_name in names(perf.mint.splsda.open.closed.sector[["study.specific.error"]])) {
# Extract the error rate for the current study
study_error <- perf.mint.splsda.open.closed.sector$study.specific.error[[study_name]]$error.rate.class$centroids.dist
# Add the study name as a column to the error data
study_error_with_name <- cbind(Study = study_name, study_error)
# Store the error data in the list
study_errors[[study_name]] <- study_error_with_name
}
# Combine all study data into a single data frame
study_error_df <- do.call(rbind, study_errors)
# Display the table
knitr::kable(as.data.frame(study_error_df), caption = "MINT sPLS-DA - error rate (centroids distance) across GBR sectors, and separately for C and O")
| Study | comp1 | comp2 | comp3 | comp4 | comp5 | comp6 | comp7 | comp8 | comp9 | comp10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C | 01_Cape_Grenville | 0.583333333333333 | 0.583333333333333 | 0.5 | 0.583333333333333 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 |
| O | 01_Cape_Grenville | 0.25 | 0.166666666666667 | 0.166666666666667 | 0.166666666666667 | 0.166666666666667 | 0.166666666666667 | 0.166666666666667 | 0.166666666666667 | 0.166666666666667 | 0.166666666666667 |
| C.1 | 02_Princess_Charlotte_bay | 0 | 0 | 0 | 0 | 0 | 0.125 | 0 | 0 | 0 | 0 |
| O.1 | 02_Princess_Charlotte_bay | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 |
| C.2 | 03_Cairns | 0.25 | 0.125 | 0.125 | 0.125 | 0.125 | 0.125 | 0.125 | 0.125 | 0.25 | 0.25 |
| O.2 | 03_Cairns | 0.416666666666667 | 0.333333333333333 | 0.416666666666667 | 0.416666666666667 | 0.416666666666667 | 0.416666666666667 | 0.416666666666667 | 0.416666666666667 | 0.416666666666667 | 0.416666666666667 |
| C.3 | 04_Innisfail | 0.133333333333333 | 0.133333333333333 | 0.133333333333333 | 0.2 | 0.133333333333333 | 0.133333333333333 | 0.133333333333333 | 0.133333333333333 | 0.133333333333333 | 0.133333333333333 |
| O.3 | 04_Innisfail | 0.0833333333333333 | 0.166666666666667 | 0.166666666666667 | 0.25 | 0.25 | 0.166666666666667 | 0.25 | 0.25 | 0.25 | 0.25 |
| C.4 | 05_Townsville | 0.392857142857143 | 0.392857142857143 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 |
| O.4 | 05_Townsville | 0.214285714285714 | 0.178571428571429 | 0.0357142857142857 | 0.0714285714285714 | 0.0714285714285714 | 0.107142857142857 | 0.107142857142857 | 0.107142857142857 | 0.107142857142857 | 0.107142857142857 |
| C.5 | 06_Swains | 0.25 | 0.25 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 |
| O.5 | 06_Swains | 0.5 | 0.4375 | 0.375 | 0.4375 | 0.3125 | 0.3125 | 0.3125 | 0.375 | 0.375 | 0.375 |
| C.6 | 07_Capricorn_Bunker | 0.3125 | 0.375 | 0.25 | 0.375 | 0.375 | 0.375 | 0.375 | 0.375 | 0.375 | 0.375 |
| O.6 | 07_Capricorn_Bunker | 0.333333333333333 | 0.333333333333333 | 0.333333333333333 | 0.333333333333333 | 0.333333333333333 | 0.333333333333333 | 0.333333333333333 | 0.25 | 0.25 | 0.25 |
# Initialize an empty list to store the results
study_errors_no_Swains <- list()
# Loop through each study in the 'study.specific.error' section
for (study_name in names(perf.mint.splsda.open.closed_no_Swains[["study.specific.error"]])) {
# Extract the error rate for the current study
study_error <- perf.mint.splsda.open.closed_no_Swains$study.specific.error[[study_name]]$error.rate.class$centroids.dist
# Add the study name as a column to the error data
study_error_with_name_no_Swains <- cbind(Study = study_name, study_error)
# Store the error data in the list
study_errors_no_Swains[[study_name]] <- study_error_with_name_no_Swains
}
# Combine all study data into a single data frame
study_error_df_no_Swains <- do.call(rbind, study_errors_no_Swains)
# Display the table
knitr::kable(as.data.frame(study_error_df_no_Swains), caption = "MINT sPLS-DA - error rate (centroids distance) across GBR sectors (Swains excluded), and separately for C and O")
| Study | comp1 | comp2 | comp3 | comp4 | comp5 | comp6 | comp7 | comp8 | comp9 | comp10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C | 01_Cape_Grenville | 0.583333333333333 | 0.583333333333333 | 0.5 | 0.583333333333333 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 |
| O | 01_Cape_Grenville | 0.25 | 0.25 | 0.25 | 0.333333333333333 | 0.25 | 0.25 | 0.25 | 0.333333333333333 | 0.333333333333333 | 0.25 |
| C.1 | 02_Princess_Charlotte_bay | 0 | 0 | 0 | 0 | 0 | 0 | 0.125 | 0 | 0 | 0 |
| O.1 | 02_Princess_Charlotte_bay | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 | 0.428571428571429 |
| C.2 | 03_Cairns | 0.25 | 0.25 | 0.125 | 0.125 | 0.125 | 0.125 | 0.125 | 0.125 | 0.25 | 0.25 |
| O.2 | 03_Cairns | 0.416666666666667 | 0.416666666666667 | 0.416666666666667 | 0.416666666666667 | 0.416666666666667 | 0.416666666666667 | 0.416666666666667 | 0.416666666666667 | 0.416666666666667 | 0.416666666666667 |
| C.3 | 04_Innisfail | 0.0666666666666667 | 0.133333333333333 | 0.133333333333333 | 0.133333333333333 | 0.133333333333333 | 0.133333333333333 | 0.133333333333333 | 0.133333333333333 | 0.133333333333333 | 0.133333333333333 |
| O.3 | 04_Innisfail | 0.166666666666667 | 0.166666666666667 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 |
| C.4 | 05_Townsville | 0.392857142857143 | 0.357142857142857 | 0.392857142857143 | 0.392857142857143 | 0.392857142857143 | 0.392857142857143 | 0.392857142857143 | 0.392857142857143 | 0.392857142857143 | 0.392857142857143 |
| O.4 | 05_Townsville | 0.214285714285714 | 0.285714285714286 | 0.142857142857143 | 0.142857142857143 | 0.178571428571429 | 0.178571428571429 | 0.214285714285714 | 0.142857142857143 | 0.142857142857143 | 0.142857142857143 |
| C.5 | 07_Capricorn_Bunker | 0.3125 | 0.3125 | 0.3125 | 0.3125 | 0.3125 | 0.3125 | 0.3125 | 0.3125 | 0.3125 | 0.3125 |
| O.5 | 07_Capricorn_Bunker | 0.333333333333333 | 0.333333333333333 | 0.333333333333333 | 0.333333333333333 | 0.333333333333333 | 0.333333333333333 | 0.333333333333333 | 0.333333333333333 | 0.333333333333333 | 0.333333333333333 |
We can choose the keepX parameter using the tune() function for a MINT object. The function performs LOGOCV for different values of test.keepX provided on each component, and no repeat argument is needed. Based on the mean classification error rate (overall error rate or BER) and a centroids distance, we output the optimal number of variables keepX to be included in the final model. This step was skipped and we retained 50 MAGs per each of the three dimensions.
# But in case I ever decide to do the tuning, this would be the script
tune.mint.splsda.open.closed.sector <- tune(X = MAGs_MINT,
Y = as.factor(pMAGs_95ANI_phyloseq_CLR_per_sector@sam_data$Open_or_Closed_to_fishing),
study = as.factor(pMAGs_95ANI_phyloseq_CLR_per_sector@sam_data$SECTOR_N_S),
ncomp = 5,
test.keepX = seq(10, 500, 10),
method = 'mint.splsda', #Specify the method
measure = 'BER',
dist = "centroids.dist")
# But in case I ever decide to do the tuning, this would be the script
tune.mint.splsda.open.closed.sector_no_Swains <- tune(X = MAGs_no_Swains,
Y = as.factor(pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains@sam_data$Open_or_Closed_to_fishing),
study = as.factor(pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains@sam_data$SECTOR_N_S),
ncomp = 5,
test.keepX = seq(10, 500, 10),
method = 'mint.splsda', #Specify the method
measure = 'BER',
dist = "centroids.dist")
plot(tune.mint.splsda.open.closed.sector, sd = FALSE)
Tuning plot of the MINT sPLS-DA models with up to 5 components. Diamonds represent the optimal number of features on a given component. Balanced error rate found on the vertical axis and is the metric to be minimised.
### Optimal nyumber of components
tune.mint.splsda.open.closed.sector$choice.ncomp
## $ncomp
## [1] 1
##
## $values
## comp1 comp2 comp3 comp4 comp5
## 01_Cape_Grenville 0.4166667 0.3750000 0.3750000 0.3750000 0.3750000
## 02_Princess_Charlotte_bay 0.2142857 0.2142857 0.2142857 0.2142857 0.2142857
## 03_Cairns 0.3333333 0.2708333 0.2708333 0.2708333 0.2708333
## 04_Innisfail 0.1500000 0.1500000 0.1833333 0.1833333 0.1833333
## 05_Townsville 0.2857143 0.2500000 0.2500000 0.2500000 0.2500000
## 06_Swains 0.3750000 0.3125000 0.3437500 0.3750000 0.3437500
## 07_Capricorn_Bunker 0.3229167 0.3541667 0.3541667 0.3541667 0.3541667
### Optimal number of ffeatures per component
tune.mint.spls_da.optimal.keepX.sector <- tune.mint.splsda.open.closed.sector$choice.keepX # extract optimal values
tune.mint.spls_da.optimal.keepX.sector
## comp1 comp2 comp3 comp4 comp5
## 350 180 10 10 20
plot(tune.mint.splsda.open.closed.sector_no_Swains, sd = FALSE)
Tuning plot of the MINT sPLS-DA models with up to 5 components. Diamonds represent the optimal number of features on a given component. Balanced error rate found on the vertical axis and is the metric to be minimised.
### Optimal nyumber of components
# tune.mint.splsda.open.closed$choice.ncomp
### Optimal number of ffeatures per component
tune.mint.spls_da.optimal.keepX.sector_no_Swains <- tune.mint.splsda.open.closed.sector_no_Swains$choice.keepX # extract optimal values
tune.mint.spls_da.optimal.keepX.sector_no_Swains
## comp1 comp2 comp3 comp4 comp5
## 420 270 190 10 250
tune.mint.splsda.open.closed.sector[["error.rate.class"]]
## $comp1
## test.keepX.350
## C 0.3076923
## O 0.3030303
##
## $comp2
## test.keepX.180
## C 0.3076923
## O 0.2525253
##
## $comp3
## test.keepX.10
## C 0.3186813
## O 0.2626263
##
## $comp4
## test.keepX.10
## C 0.3186813
## O 0.2727273
##
## $comp5
## test.keepX.20
## C 0.3186813
## O 0.2626263
tune.mint.splsda.open.closed.sector_no_Swains[["error.rate.class"]]
## $comp1
## test.keepX.420
## C 0.2988506
## O 0.2771084
##
## $comp2
## test.keepX.270
## C 0.2873563
## O 0.2891566
##
## $comp3
## test.keepX.190
## C 0.2988506
## O 0.2650602
##
## $comp4
## test.keepX.10
## C 0.2988506
## O 0.2650602
##
## $comp5
## test.keepX.250
## C 0.2758621
## O 0.2771084
final.mint.splsda.open.closed.sector <- mint.splsda(X = MAGs_MINT,
Y = pMAGs_95ANI_phyloseq_CLR_per_sector@sam_data$Open_or_Closed_to_fishing,
keepX = tune.mint.spls_da.optimal.keepX.sector,
# keepX = c(50, 50, 50), # Uncomment for arbitrary number of features
study = pMAGs_95ANI_phyloseq_CLR_per_sector@sam_data$SECTOR_N_S,
ncomp = 2) # based on the BER
## keepX is of length 5 while ncomp is 2
## trimming keepX to [33mc(350,180)[39m
final.mint.splsda.open.closed.sector_no_Swains <- mint.splsda(X = MAGs_no_Swains,
Y = pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains@sam_data$Open_or_Closed_to_fishing,
keepX = tune.mint.spls_da.optimal.keepX.sector_no_Swains,
# keepX = c(50, 50, 50), # Uncomment for arbitrary number of features
study = pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains@sam_data$SECTOR_N_S,
ncomp = 2) # based on the BER
## keepX is of length 5 while ncomp is 2
## trimming keepX to [33mc(420,270)[39m
sample.plot.mint.splsda.open.closed.global <- plotIndiv(final.mint.splsda.open.closed.sector,
study = "global",
comp = c(1,2), # We can look into comp 3 too, but I think 2 dimensions are enough
legend = TRUE,
col.per.group = c("seagreen3", # Closed to fishing
"steelblue4"), # open to fishing
# title = 'MINT sPLS-DA using IMOS GBR-MGD MAGs | PCs 1-2',
subtitle = 'Reefs that are open or closed to fishing',
ellipse = T,
)
Sample plot from the MINT sPLS-DA performed on the IMOS-MGD seawater MAGs to identify microbes that discriminate between reefs that are open or closed to fishing. Samples (reef sites) are projected into the MINT sPLS-DA space spanned by the first two components, and coloured by their corresponding category reflecting reef protection level. Symbols indicate the membership to an LTMP trip/transect. Component 1 discriminates between the two reef categories.
sample.plot.mint.splsda.open.closed.global_no_Swains <- plotIndiv(final.mint.splsda.open.closed.sector_no_Swains,
study = "global",
comp = c(1,2), # We can look into comp 3 too, but I think 2 dimensions are enough
legend = TRUE,
col.per.group = c("seagreen3", # Closed to fishing
"steelblue4"), # open to fishing
title = 'MINT sPLS-DA | Training dataset on 6 sectors',
subtitle = 'MINT sPLS-DA | Training dataset on 6 sectors',
ellipse = T
# pch = pch.vec
)
sample.plot.mint.splsda.open.closed.partial.sector <- plotIndiv(final.mint.splsda.open.closed.sector,
legend = TRUE,
study = "all.partial",
col.per.group = c("seagreen3", # Closed to fishing
"steelblue4"), # open to fishing
# title = 'MINT sPLS-DA using IMOS GBR-MGD MAGs: Open or Closed to fishing',
ellipse = F)
sample.plot.mint.splsda.open.closed.partial.sector_no_Swains <- plotIndiv(final.mint.splsda.open.closed.sector_no_Swains,
legend = TRUE,
study = "all.partial",
col.per.group = c("seagreen3", # Closed to fishing
"steelblue4"), # open to fishing
# title = 'MINT sPLS-DA using IMOS GBR-MGD MAGs: Open or Closed to fishing',
ellipse = F)
Lastly, what about those spatiotemporal batch effects we observed in (s)PLS-DA?
plotIndiv(final.mint.splsda.open.closed.sector,
study = "global",
comp = c(1,2), # We can look into comp 3 too, but I think 2 dimensions are enough
legend = TRUE,
group = sample_data(pMAGs_95ANI_phyloseq_CLR_per_sector)$Sampling_trip,
col.per.group =c("indianred", # Sampling trip 1
"indianred4", # Sampling trip 2
"red3", # Sampling trip 3
"slateblue"), # Sampling trip 4
title = 'MINT sPLS-DA using IMOS GBR-MGD MAGs | PCs 1-2',
subtitle = 'AIMS-LTMP trips', ellipse = T)
This plot shows that there is no longer clustering between the trips, suggesting we accounted for spatiotemporal batch effects.
We can examine our molecular signature selected with MINT sPLS-DA. The correlation circle plot highlights the contribution of each selected MAG to each component (close to the large circle), and their correlation (clusters of variables).
# all gene names have the same first 10 characters,
# shorten all the names to reduce visual clutter
plotVar(final.mint.splsda.open.closed.sector,
var.names = list(MAGs.name.super.short),
cutoff = 0.7,
)
Correlation circle plot representing the MAGs selected by MINT sPLS-DA to discriminate between reefs that are open or closed to fishing, across the four sampling transects. We only show the MAGs selected by MINT sPLS-DA on components 1 and 2, and with a corr. cutoff = 0.8. This graphic should be interpreted in conjunction with the sample plot above - i.e. we observe a subset of seawater MAGs that are strongly correlated and positively associated to component 1 (positive values on the x-axis, on the right), which are likely to characterise the groups of reef sites that are open to fishing. We also observe a subset of seawater MAGs that are negatively associated to dimension 1 (x-axis, left) that may characterise reef sites that are protected/closed to fishing (and are negatively correlated to the previous group of MAGs).
# all gene names have the same first 10 characters,
# shorten all the names to reduce visual clutter
plotVar(final.mint.splsda.open.closed.sector_no_Swains,
var.names = list(MAGs.name.super.short),
cutoff = 0.7,
)
Correlation circle plot representing the MAGs selected by MINT sPLS-DA to discriminate between reefs that are open or closed to fishing, across the four sampling transects. We only show the MAGs selected by MINT sPLS-DA on components 1 and 2, and with a corr. cutoff = 0.8. This graphic should be interpreted in conjunction with the sample plot above - i.e. we observe a subset of seawater MAGs that are strongly correlated and positively associated to component 1 (positive values on the x-axis, on the right), which are likely to characterise the groups of reef sites that are open to fishing. We also observe a subset of seawater MAGs that are negatively associated to dimension 1 (x-axis, left) that may characterise reef sites that are protected/closed to fishing (and are negatively correlated to the previous group of MAGs).
The plotLoadings() function displays the coefficient weight of each selected MAG in each IMOS GBR-MGD transect and shows the agreement of the molecular signatures across LTMP trips. Colours indicate the reef class (open or closed to fishing) in which the mean expression value of each selected MAG is maximal. These plots should be interpreted jointly with MINT sPLS-DA sample plots.
loading.plots.open.closed.per.sector <- plotLoadings(final.mint.splsda.open.closed.sector,
contrib = "max",
method = 'mean',
comp=1,
legend = TRUE,
legend.color = c("seagreen3", # Closed
"steelblue4"), # Open
ndisplay = 60,
name.var = MAGs.name.short,
study="all.partial",
title="Contribution on comp 1",
# subtitle = paste("Study",1:4)
)
loading.plots.open.closed.per.sector
loading.plots.open.closed.per.sector_no_Swains <- plotLoadings(final.mint.splsda.open.closed.sector_no_Swains,
contrib = "max",
method = 'mean',
comp=1,
legend = TRUE,
legend.color = c("seagreen3", # Closed
"steelblue4"), # Open
ndisplay = 60,
name.var = MAGs.name.short,
study="all.partial",
title="Contribution on comp 1",
# subtitle = paste("Study",1:4)
)
final.mint.splsda.open.closed.sector_no_Swains
We can also look at the averaged contribution across MINT studies:
loading.plots.open.closed.across.sectors <- plotLoadings(final.mint.splsda.open.closed.sector,
contrib = "max",
method = 'mean',
comp=1,
legend = TRUE,
legend.color = c("seagreen3", # Closed
"steelblue4"), # Open
ndisplay = 60,
name.var = MAGs.name.short,
# study="all.partial",
title="Contribution on comp 1",
# subtitle = paste("Study",1:4)
)
loading.plots.open.closed.across.sectors
The Clustered Image Map represents the enrichment levels of the MAG signature per sample. Here we use the default Euclidean distance and Complete linkage for the first component.
cols.open.closed <- c("seagreen3","steelblue4")
open.closed.cols <- palette(cols.open.closed)[as.numeric(as.factor(pMAGs_95ANI_phyloseq_CLR_per_sector@sam_data$Open_or_Closed_to_fishing))]
cols.open.closed <- c("seagreen3","steelblue4")
open.closed.cols <- palette(cols.open.closed)[as.numeric(as.factor(pMAGs_95ANI_phyloseq_CLR_per_sector@sam_data$Open_or_Closed_to_fishing))]
# Exporting the names of taxa now, and making them short - easier plotting!
MAGs.name.short <- left_join(otu_table(pMAGs_95ANI_phyloseq_CLR_per_sector) %>%
as.data.frame %>%
rownames_to_column("OTU"),
tax_table(pMAGs_95ANI_phyloseq_CLR_per_sector) %>%
as.data.frame %>%
rownames_to_column("OTU")) %>%
tidyr::unite(taxonomy, c(Class, Order, Family, Genus, Species), sep = "; ") # Adding Taxonomy info
## Joining, by = "OTU"
MAGs.name.short <- as.character(MAGs.name.short$taxonomy)
cim(final.mint.splsda.open.closed.sector,
comp = 1,
# cutoff = 0.95,
col.names = MAGs.name.short,
margins=c(37, # bottom
25), # right
keysize = c(1, 0.4),
# row.sideColors = color.mixo(as.numeric(as.factor(metadata_IMOS_MGD_MAGs$Open_or_Closed_to_fishing))),
row.names = T,
legend = list(color.mixo(as.numeric(as.factor(pMAGs_95ANI_phyloseq_CLR_per_sector@sam_data$Open_or_Closed_to_fishing))),
title = "Reef protection level"),
row.sideColors = open.closed.cols,
title = "MINT sPLS-DA, component 1",
xlab = "Indicator MAGs that discriminate between take and no take zones",
ylab = "Reef sites")
Clustered Image Map of the MAGs selected by MINT sPLS-DA for component 1 only, to discriminate between reefs that are open or closed to fishing. A hierarchical clustering based on the MAG enrichment levels of the indicator MAGs on component 1, with reef sites in rows coloured according to protection level.
cols.open.closed <- c("seagreen3","steelblue4")
open.closed.cols <- palette(cols.open.closed)[as.numeric(as.factor(pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains@sam_data$Open_or_Closed_to_fishing))]
cols.open.closed <- c("seagreen3","steelblue4")
open.closed.cols <- palette(cols.open.closed)[as.numeric(as.factor(pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains@sam_data$Open_or_Closed_to_fishing))]
# Exporting the names of taxa now, and making them short - easier plotting!
MAGs.name.short <- left_join(otu_table(pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains) %>%
as.data.frame %>%
rownames_to_column("OTU"),
tax_table(pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains) %>%
as.data.frame %>%
rownames_to_column("OTU")) %>%
tidyr::unite(taxonomy, c(Class, Order, Family, Genus, Species), sep = "; ") # Adding Taxonomy info
## Joining, by = "OTU"
MAGs.name.short <- as.character(MAGs.name.short$taxonomy)
cim(final.mint.splsda.open.closed.sector_no_Swains,
comp = 1,
# cutoff = 0.95,
col.names = MAGs.name.short,
margins=c(37, # bottom
25), # right
keysize = c(1, 0.4),
# row.sideColors = color.mixo(as.numeric(as.factor(metadata_IMOS_MGD_MAGs$Open_or_Closed_to_fishing))),
row.names = T,
legend = list(color.mixo(as.numeric(as.factor(pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains@sam_data$Open_or_Closed_to_fishing))),
title = "Reef protection level"),
row.sideColors = open.closed.cols,
title = "MINT sPLS-DA, component 1",
xlab = "Indicator MAGs that discriminate between take and no take zones",
ylab = "Reef sites")
Clustered Image Map of the MAGs selected by MINT sPLS-DA for component 1 only, to discriminate between reefs that are open or closed to fishing. A hierarchical clustering based on the MAG enrichment levels of the indicator MAGs on component 1, with reef sites in rows coloured according to protection level.
# Exporting for RawGraphs
cols.open.closed <- c("seagreen3","steelblue4")
open.closed.cols <- palette(cols.open.closed)[as.numeric(as.factor(pMAGs_95ANI_phyloseq_CLR_per_sector@sam_data$Open_or_Closed_to_fishing))]
cols.open.closed <- c("seagreen3","steelblue4")
open.closed.cols <- palette(cols.open.closed)[as.numeric(as.factor(pMAGs_95ANI_phyloseq_CLR_per_sector@sam_data$Open_or_Closed_to_fishing))]
# Getting group assignments for the 160 MAGs selected on component 1 - info in loading plots
loading.plots.open.closed.all.MAGs.sectors <- plotLoadings(final.mint.splsda.open.closed.sector,
contrib = "max",
method = 'mean',
comp=1,
legend = TRUE,
legend.color = c("seagreen3", # Closed
"steelblue4"), # Open
# ndisplay = 30,
name.var = MAGs.name.short,
study="all.partial",
title="Contribution on comp 1",
# subtitle = paste("Study",1:4)
)
loading.plots.rawgraphs.sectors <- loading.plots.open.closed.all.MAGs.sectors %>%
as.data.frame() # converting to data frame
loading.plots.rawgraphs.sectors <- loading.plots.rawgraphs.sectors %>%
dplyr::select(ends_with("GroupContrib")) %>%
rownames_to_column("OTU")
# I will need the names first - full taxonomy with MAG ID numbers
MAGs.name.full <- left_join(otu_table(pMAGs_95ANI_phyloseq_CLR_per_sector) %>%
as.data.frame %>%
rownames_to_column("OTU"),
tax_table(pMAGs_95ANI_phyloseq_CLR_per_sector) %>%
as.data.frame %>%
rownames_to_column("OTU")) %>%
tidyr::unite(taxonomy, c(OTU, Domain, Phylum, Class, Order, Family, Genus, Species), sep = "; ") # Adding Taxonomy info
## Joining, by = "OTU"
MAGs.name.full <- as.character(MAGs.name.full$taxonomy)
# Saving as an R object
mint.spls.da.rawgraphs.sectors <- cim(final.mint.splsda.open.closed.sector,
comp = 1,
# cutoff = 0.95,
col.names = MAGs.name.full,
margins=c(37, # bottom
25), # right
keysize = c(1, 0.4),
transpose = T,
# row.sideColors = color.mixo(as.numeric(as.factor(metadata_IMOS_MGD_MAGs$Open_or_Closed_to_fishing))),
row.names = T,
legend = list(color.mixo(as.numeric(as.factor(pMAGs_95ANI_phyloseq_CLR_per_sector@sam_data$Open_or_Closed_to_fishing))),
title = "Reef protection level"),
row.sideColors = open.closed.cols,
title = "MINT sPLS-DA, component 1",
xlab = "Indicator MAGs that discriminate between take and no take zones",
ylab = "Reef sites")
# Making a final object for RawGraphs
rawgraphs.order.sectors <- mint.spls.da.rawgraphs.sectors$col.names %>%
as.data.frame() %>%
tidyr::separate(., col = ., into = c("OTU", "Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"), sep = "; ") # Separating again
rawgraphs_final.sectors <- left_join(rawgraphs.order.sectors, loading.plots.rawgraphs.sectors)
# Exporting :)
write.csv(rawgraphs_final.sectors, file = "/home/marko-terzin/Desktop/MINT_sPLS_DA_for_rawgraphs_sectors.csv", row.names = F, quote = F)
# Reordering to have the same order as in the MINT sPLS-DA heatmap
# Used the solution from here:
# https://syntaxfix.com/question/22528/how-do-you-specifically-order-ggplot2-x-axis-instead-of-alphabetical-order
group.contrib.order.sectors <- rawgraphs.order.sectors$OTU %>%
as.data.frame()
colnames(group.contrib.order.sectors) <- "OTU"
loading.plots.rawgraphs.sectors <- left_join(group.contrib.order.sectors, loading.plots.rawgraphs.sectors)
# And lastly, plotting group contrib as a heatmap
loading.plots.long.sectors <- pivot_longer(loading.plots.rawgraphs.sectors, colnames(loading.plots.rawgraphs.sectors[,2:8]))
# Plotting
ggplot(loading.plots.long.sectors, # Draw heatmap-like plot
aes(y=factor(OTU, level = unique(loading.plots.rawgraphs.sectors$OTU)), # Ordering in the same order as on the cim heatmap
x=name,
fill = value)) +
geom_tile() + # plot a heatmap
scale_fill_manual(values = c("seagreen3", # No-take zones
"steelblue4")) + # Take zones
labs(x = 'AIMS-LTMP sectors',
y = "Indicator MAGs",
title = 'Take vs. no-take zones') +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 12))
Saving as R object for rawgraphs.
# Exporting for RawGraphs
cols.open.closed <- c("seagreen3","steelblue4")
open.closed.cols <- palette(cols.open.closed)[as.numeric(as.factor(pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains@sam_data$Open_or_Closed_to_fishing))]
cols.open.closed <- c("seagreen3","steelblue4")
open.closed.cols <- palette(cols.open.closed)[as.numeric(as.factor(pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains@sam_data$Open_or_Closed_to_fishing))]
# Getting group assignments for the 160 MAGs selected on component 1 - info in loading plots
loading.plots.open.closed.all.MAGs.sectors_no_Swains <- plotLoadings(final.mint.splsda.open.closed.sector_no_Swains,
contrib = "max",
method = 'mean',
comp=1,
legend = TRUE,
legend.color = c("seagreen3", # Closed
"steelblue4"), # Open
# ndisplay = 30,
name.var = MAGs.name.short,
study="all.partial",
title="Contribution on comp 1",
# subtitle = paste("Study",1:4)
)
loading.plots.rawgraphs.sectors_no_Swains <- loading.plots.open.closed.all.MAGs.sectors_no_Swains %>%
as.data.frame() # converting to data frame
loading.plots.rawgraphs.sectors_no_Swains <- loading.plots.rawgraphs.sectors_no_Swains %>%
dplyr::select(ends_with("GroupContrib")) %>%
rownames_to_column("OTU")
# I will need the names first - full taxonomy with MAG ID numbers
MAGs.name.full <- left_join(otu_table(pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains) %>%
as.data.frame %>%
rownames_to_column("OTU"),
tax_table(pMAGs_95ANI_phyloseq_CLR_per_sector_no_Swains) %>%
as.data.frame %>%
rownames_to_column("OTU")) %>%
tidyr::unite(taxonomy, c(OTU, Domain, Phylum, Class, Order, Family, Genus, Species), sep = "; ") # Adding Taxonomy info
## Joining, by = "OTU"
MAGs.name.full <- as.character(MAGs.name.full$taxonomy)
# Saving as an R object
mint.spls.da.rawgraphs.sectors_no_Swains <- cim(final.mint.splsda.open.closed.sector_no_Swains,
comp = 1,
# cutoff = 0.95,
col.names = MAGs.name.full,
margins=c(37, # bottom
25), # right
keysize = c(1, 0.4),
transpose = T,
# row.sideColors = color.mixo(as.numeric(as.factor(metadata_IMOS_MGD_MAGs$Open_or_Closed_to_fishing))),
row.names = T,
legend = list(color.mixo(as.numeric(as.factor(pMAGs_95ANI_phyloseq_CLR_per_sector@sam_data$Open_or_Closed_to_fishing))),
title = "Reef protection level"),
row.sideColors = open.closed.cols,
title = "MINT sPLS-DA, component 1",
xlab = "Indicator MAGs that discriminate between take and no take zones",
ylab = "Reef sites")
# Making a final object for RawGraphs
rawgraphs.order.sectors_no_Swains <- mint.spls.da.rawgraphs.sectors_no_Swains$col.names %>%
as.data.frame() %>%
tidyr::separate(., col = ., into = c("OTU", "Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"), sep = "; ") # Separating again
rawgraphs_final.sectors_no_Swains <- left_join(rawgraphs.order.sectors_no_Swains, loading.plots.rawgraphs.sectors_no_Swains)
# Exporting :)
write.csv(rawgraphs_final.sectors_no_Swains, file = "/home/marko-terzin/Desktop/MINT_sPLS_DA_for_rawgraphs_sectors_no_Swains.csv", row.names = F, quote = F)
# Reordering to have the same order as in the MINT sPLS-DA heatmap
# Used the solution from here:
# https://syntaxfix.com/question/22528/how-do-you-specifically-order-ggplot2-x-axis-instead-of-alphabetical-order
group.contrib.order.sectors_no_Swains <- rawgraphs.order.sectors_no_Swains$OTU %>%
as.data.frame()
colnames(group.contrib.order.sectors_no_Swains) <- "OTU"
loading.plots.rawgraphs.sectors_no_Swains <- left_join(group.contrib.order.sectors_no_Swains, loading.plots.rawgraphs.sectors_no_Swains)
# And lastly, plotting group contrib as a heatmap
loading.plots.long.sectors_no_Swains <- pivot_longer(loading.plots.rawgraphs.sectors_no_Swains, colnames(loading.plots.rawgraphs.sectors_no_Swains[,2:7]))
# Plotting
ggplot(loading.plots.long.sectors_no_Swains, # Draw heatmap-like plot
aes(y=factor(OTU, level = unique(loading.plots.rawgraphs.sectors_no_Swains$OTU)), # Ordering in the same order as on the cim heatmap
x=name,
fill = value)) +
geom_tile() + # plot a heatmap
scale_fill_manual(values = c("seagreen3", # No-take zones
"steelblue4")) + # Take zones
labs(x = '6 AIMS-LTMP sectors',
y = "Indicator MAGs",
title = 'Take vs. no-take zones') +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 12))
Saving as R object for rawgraphs.
Use of the auroc() function will yield a visualisation of classification performance when undergoing the LOGOCV procedure from above. The interpretation of this output may not be particularly insightful in relation to the performance evaluation of our methods, but can complement the statistical analysis. For example, the model’s classification of open vs. closed sites had ~78% accuracy in classifying open and closed reefs in their corresponding category.
MINT_sPLS_da_open_closed_model_performance.sectors <- auroc(final.mint.splsda.open.closed.sector,
roc.comp = 1,
print = FALSE)
ROC curve and AUC from the MINT sPLS-DA performed on the IMOS GBR-MGD MAGs (876 MAGs, dereplicated in dRep at 95% ANI) for global component 1 for the open vs. closed reefs comparison. Numerical outputs include the AUC (0.7831) and a Wilcoxon test p−value (8.489e-12) for open vs. closed reefs class comparison that are performed per component.
MINT_sPLS_da_open_closed_model_performance.sectors$Comp1
## AUC p-value
## C vs O 0.7354 2.146e-08
MINT_sPLS_da_open_closed_model_performance.sectors_no_Swains <- auroc(final.mint.splsda.open.closed.sector_no_Swains,
roc.comp = 1,
print = FALSE)
ROC curve and AUC from the MINT sPLS-DA performed on the IMOS GBR-MGD MAGs (876 MAGs, dereplicated in dRep at 95% ANI) for global component 1 for the open vs. closed reefs comparison. Numerical outputs include the AUC (0.7831) and a Wilcoxon test p−value (8.489e-12) for open vs. closed reefs class comparison that are performed per component.
MINT_sPLS_da_open_closed_model_performance.sectors_no_Swains$Comp1
## AUC p-value
## C vs O 0.7347 1.275e-07
We use the predict() function to predict the class membership of new test samples from an external study - Swains. Specifically, wrain the MINT model on the remaining 6 GBR sectors, then predict on the Swains sector. This process exactly reflects the inner workings of the tune() and perf() functions using LOGOCV. Here during our model training on the six sectors, we will set the parameters (i.e. choose ncomp and keepX) as in the tuning steps previously, using the model final.mint.splsda.open.closed.sector_no_Swains.
# We predict on study 3
indiv.test <- which(study == "3")
# We train on the remaining studies, with pre-tuned parameters
mint.splsda.stem2 <- mint.splsda(X = X[-c(indiv.test), ],
Y = Y[-c(indiv.test)],
study = droplevels(study[-c(indiv.test)]),
ncomp = 1,
keepX = 30)
mint.predict.stem <- predict(mint.splsda.stem2, newdata = X[indiv.test, ],
dist = "centroids.dist",
study.test = factor(study[indiv.test]))
# Store class prediction with a model with 1 comp
indiv.prediction <- mint.predict.stem$class$centroids.dist[, 1]
# The confusion matrix compares the real subtypes with the predicted subtypes
conf.mat <- get.confusion_matrix(truth = Y[indiv.test],
predicted = indiv.prediction)
conf.mat
# =============================================
# MINT sPLS-DA Prediction on External Dataset (Swains)
# =============================================
# --------------------------------------------
# Step 1: Prepare the Swains test data (CLR-transformed)
# --------------------------------------------
# CLR transform (if not already done)
pMAGs_95ANI_phyloseq_clr_Swains_only <- microbiome::transform(pMAGs_95ANI_phyloseq_Swains_only, transform = "clr")
# Extract OTU table (MAGs) and transpose to samples x features
MAGs_Swains_only <- as.data.frame(
pMAGs_95ANI_phyloseq_clr_Swains_only@otu_table
)
MAGs_Swains_only <- t(MAGs_Swains_only)
# 1. Extract TRUE labels (Open_or_Closed_to_fishing) from phyloseq
true_labels_Swains <- pMAGs_95ANI_phyloseq_clr_Swains_only@sam_data$Open_or_Closed_to_fishing
# 2. Ensure true_labels_Swains is a factor (if not already)
true_labels_Swains <- factor(true_labels_Swains)
# Ensure the feature names match the training data
# (If needed, align columns with the model's feature space)
MAGs_Swains_only <- MAGs_Swains_only[, colnames(final.mint.splsda.open.closed.sector_no_Swains$X)]
# --------------------------------------------
# Step 2: Define test indices (Swains samples)
# --------------------------------------------
study.test <- factor(rep("06_Swains", nrow(MAGs_Swains_only)))
# 5. Sync factor levels with training data (critical for MINT)
levels(study.test) <- levels(final.mint.splsda.open.closed.sector_no_Swains$study)
# --------------------------------------------
# Step 3: Predict on Swains using the pre-trained MINT model
# --------------------------------------------
mint.predict.Swains <- predict(
final.mint.splsda.open.closed.sector_no_Swains, # Your pre-trained model
newdata = MAGs_Swains_only, # Swains test data
dist = "centroids.dist", # Distance metric
study.test = study.test # Study assignment for test data
)
# --------------------------------------------
# Step 4: Extract predictions and evaluate
# --------------------------------------------
# Get predicted classes (using the first two components)
indiv.prediction <- mint.predict.Swains$class$centroids.dist[, 2]
# Confusion matrix (if true labels are available)
conf.mat <- get.confusion_matrix(
truth = true_labels_Swains, # True labels for Swains
predicted = indiv.prediction # Predicted labels
)
# Print confusion matrix
print(conf.mat)
## predicted.as.C predicted.as.O
## C 4 0
## O 9 7
# Calculate accuracy
accuracy <- sum(diag(conf.mat)) / sum(conf.mat)
print(paste("Accuracy:", round(accuracy, 3)))
## [1] "Accuracy: 0.55"
final.mint.splsda.open.closed.sector.DB_paper <- mint.splsda(X = MAGs,
Y = pMAGs_95ANI_phyloseq_clr@sam_data$Open_or_Closed_to_fishing,
# keepX = tune.mint.spls_da.optimal.keepX.sector,
keepX = c(60, 10), # Uncomment for arbitrary number of features
study = pMAGs_95ANI_phyloseq_clr@sam_data$SECTOR_N_S,
ncomp = 2) # based on the BER
perf.final.mint.splsda.open.closed.sector.DB_paper <- perf(final.mint.splsda.open.closed.sector.DB_paper)
# Initialize an empty list to store the results
study_errors_overall <- list()
# Loop through each study in the 'study.specific.error' section
for (study_name in names(perf.final.mint.splsda.open.closed.sector.DB_paper[["study.specific.error"]])) {
# Extract the error rate for the current study
study_error_overall <- perf.final.mint.splsda.open.closed.sector.DB_paper$study.specific.error[[study_name]]$overall
# Add the study name as a column to the error data
study_error_with_name_overall <- cbind(Study = study_name, study_error_overall)
# Store the error data in the list
study_errors_overall[[study_name]] <- study_error_with_name_overall
}
# Combine all study data into a single data frame
study_error_df_overall <- do.call(rbind, study_errors_overall)
# Display the table
knitr::kable(as.data.frame(study_error_df_overall), caption = "MINT sPLS-DA - error rate (centroids distance) across GBR sectors")
| Study | max.dist | centroids.dist | mahalanobis.dist | |
|---|---|---|---|---|
| comp1 | 01_Cape_Grenville | 0.5 | 0.541666666666667 | 0.541666666666667 |
| comp2 | 01_Cape_Grenville | 0.583333333333333 | 0.5 | 0.583333333333333 |
| comp1.1 | 02_Princess_Charlotte_bay | 0.4 | 0.4 | 0.4 |
| comp2.1 | 02_Princess_Charlotte_bay | 0.733333333333333 | 0.4 | 0.733333333333333 |
| comp1.2 | 03_Cairns | 0.3 | 0.3 | 0.3 |
| comp2.2 | 03_Cairns | 0.35 | 0.35 | 0.4 |
| comp1.3 | 04_Innisfail | 0.148148148148148 | 0.148148148148148 | 0.148148148148148 |
| comp2.3 | 04_Innisfail | 0.185185185185185 | 0.148148148148148 | 0.185185185185185 |
| comp1.4 | 05_Townsville | 0.25 | 0.25 | 0.25 |
| comp2.4 | 05_Townsville | 0.285714285714286 | 0.25 | 0.285714285714286 |
| comp1.5 | 06_Swains | 0.5 | 0.5 | 0.5 |
| comp2.5 | 06_Swains | 0.4 | 0.5 | 0.4 |
| comp1.6 | 07_Capricorn_Bunker | 0.5 | 0.535714285714286 | 0.535714285714286 |
| comp2.6 | 07_Capricorn_Bunker | 0.428571428571429 | 0.535714285714286 | 0.392857142857143 |
# Initialize an empty list to store the results
study_errors <- list()
# Loop through each study in the 'study.specific.error' section
for (study_name in names(perf.final.mint.splsda.open.closed.sector.DB_paper[["study.specific.error"]])) {
# Extract the error rate for the current study
study_error <- perf.final.mint.splsda.open.closed.sector.DB_paper$study.specific.error[[study_name]]$error.rate.class$centroids.dist
# Add the study name as a column to the error data
study_error_with_name <- cbind(Study = study_name, study_error)
# Store the error data in the list
study_errors[[study_name]] <- study_error_with_name
}
# Combine all study data into a single data frame
study_error_df <- do.call(rbind, study_errors)
# Display the table
knitr::kable(as.data.frame(study_error_df), caption = "MINT sPLS-DA - error rate (centroids distance) across GBR sectors, and separately for C and O")
| Study | comp1 | comp2 | |
|---|---|---|---|
| C | 01_Cape_Grenville | 0.583333333333333 | 0.5 |
| O | 01_Cape_Grenville | 0.5 | 0.5 |
| C.1 | 02_Princess_Charlotte_bay | 0.375 | 0.375 |
| O.1 | 02_Princess_Charlotte_bay | 0.428571428571429 | 0.428571428571429 |
| C.2 | 03_Cairns | 0.125 | 0.25 |
| O.2 | 03_Cairns | 0.416666666666667 | 0.416666666666667 |
| C.3 | 04_Innisfail | 0.2 | 0.2 |
| O.3 | 04_Innisfail | 0.0833333333333333 | 0.0833333333333333 |
| C.4 | 05_Townsville | 0.321428571428571 | 0.357142857142857 |
| O.4 | 05_Townsville | 0.178571428571429 | 0.142857142857143 |
| C.5 | 06_Swains | 0.25 | 0.25 |
| O.5 | 06_Swains | 0.5625 | 0.5625 |
| C.6 | 07_Capricorn_Bunker | 0.5 | 0.5 |
| O.6 | 07_Capricorn_Bunker | 0.583333333333333 | 0.583333333333333 |
# Is there a disconnect between the value I see here and in those mixOmics tuning plots? If yes, check if that is normal or weird
final.mint.splsda.open.closed.sector.DB_paper_model_performance <- auroc(final.mint.splsda.open.closed.sector.DB_paper, roc.comp = 1, print = FALSE)
final.mint.splsda.open.closed.sector.DB_paper_model_performance$Comp1
plotIndiv(final.mint.splsda.open.closed.sector.DB_paper,
study = "global",
comp = c(1,2), # We can look into comp 3 too, but I think 2 dimensions are enough
legend = TRUE,
col.per.group = c("seagreen3", # Closed to fishing
"steelblue4"), # open to fishing
title = 'MINT sPLS-DA using IMOS GBR-MGD MAGs | PCs 1-2',
subtitle = 'Reefs that are open or closed to fishing', ellipse = T)
# Since Steve asked me NOT to plot the sectors as symbols, I had to plot it externally in ggplot2, and not using mixOMics
variates_data <- as.data.frame(final.mint.splsda.open.closed.sector.DB_paper$variates$X)
variates_data$Group <- final.mint.splsda.open.closed.sector.DB_paper$Y
# Flom the previous plot, adding manually how much variance is explained on MINT sPLS-DA dimensions 1 and 2
explained_var1 <- 0.23 # Replace with actual value for comp1
explained_var2 <- 0.05 # Replace with actual value for comp2
ggplot(variates_data, aes(x = comp1, y = comp2, color = Group, fill = Group)) +
geom_point() +
stat_ellipse(aes(color = Group), geom = "path", linewidth = 1) + # Change to "path" for lines
# stat_ellipse(geom = "polygon", alpha = 0.1, aes(fill = factor(Group)), color = NA) +
geom_hline(yintercept = 0, linetype = "dashed", color = "black", alpha = 0.3) +
geom_vline(xintercept = 0, linetype = "dashed", color = "black", alpha = 0.3) +
scale_color_manual(values = c("seagreen3", "steelblue4")) + # Coloring the points
scale_fill_manual(values = c("seagreen3", "steelblue4")) + # Coloring the ellipses
labs(x = paste('X-variate 1 (Variance Explained: ', explained_var1 * 100, '%)', sep = ''),
y = paste('X-variate 2 (Variance Explained: ', explained_var2 * 100, '%)', sep = ''),
title = 'MINT sPLS-DA using the 876 IMOS GBR-MGD pMAGs (95% ANI) | PCs 1-2',
subtitle = 'top 60 most discriminatory pMAGs') +
theme_minimal()
loading.plots.open.closed.per.sector.Steve <- plotLoadings(final.mint.splsda.open.closed.sector.DB_paper,
contrib = "max",
method = 'mean',
comp=1,
legend = TRUE,
legend.color = c("seagreen3", # Closed
"steelblue4"), # Open
# ndisplay = 60,
name.var = MAGs.name.short,
study="all.partial",
title="Contribution on comp 1",
# subtitle = paste("Study",1:4)
)
loading.plots.open.closed.per.sector.Steve
We can also look at the averaged contribution across MINT studies (sectors):
loading.plots.open.closed.across.sectors.DB <- plotLoadings(final.mint.splsda.open.closed.sector.DB_paper,
contrib = "max",
method = 'mean',
comp=1,
legend = TRUE,
legend.color = c("seagreen3", # Closed
"steelblue4"), # Open
ndisplay = 60,
name.var = MAGs.name.short,
# study="all.partial",
title="Contribution on comp 1",
# subtitle = paste("Study",1:4)
)
loading.plots.open.closed.across.sectors.DB
save.image(file = "/home/marko-terzin/Documents/PhD/Thesis/Chapter_3_IMOS-MGD_MAGs/cleaned_code/Figure_2/Figure_2.RData")
The final map (Fig. 2) was created in Inkscape.